1 |
#!/usr/bin/python2.4 |
2 |
# xdelta 3 - delta compression tools and library |
3 |
# Copyright (C) 2003, 2006, 2007. Joshua P. MacDonald |
4 |
# |
5 |
# This program is free software; you can redistribute it and/or modify |
6 |
# it under the terms of the GNU General Public License as published by |
7 |
# the Free Software Foundation; either version 2 of the License, or |
8 |
# (at your option) any later version. |
9 |
# |
10 |
# This program is distributed in the hope that it will be useful, |
11 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
13 |
# GNU General Public License for more details. |
14 |
# |
15 |
# You should have received a copy of the GNU General Public License |
16 |
# along with this program; if not, write to the Free Software |
17 |
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
18 |
|
19 |
# TODO: test 1.5 vs. greedy |
20 |
|
21 |
import os, sys, math, re, time, types, array, random |
22 |
import xdelta3main |
23 |
import xdelta3 |
24 |
|
25 |
#RCSDIR = '/mnt/polaroid/Polaroid/orbit_linux/home/jmacd/PRCS' |
26 |
RCSDIR = '/tmp/PRCS_read_copy' |
27 |
SAMPLEDIR = "/tmp/WESNOTH_tmp/diff" |
28 |
|
29 |
#RCSDIR = 'G:/jmacd/PRCS/prcs/b' |
30 |
#SAMPLEDIR = "C:/sample_data/Wesnoth/tar" |
31 |
|
32 |
# |
33 |
MIN_SIZE = 0 |
34 |
|
35 |
TIME_TOO_SHORT = 0.050 |
36 |
|
37 |
SKIP_TRIALS = 2 |
38 |
MIN_TRIALS = 3 |
39 |
MAX_TRIALS = 15 |
40 |
|
41 |
SKIP_DECODE = 1 |
42 |
|
43 |
# 10 = fast 1.5 = slow |
44 |
MIN_STDDEV_PCT = 1.5 |
45 |
|
46 |
# How many results per round |
47 |
MAX_RESULTS = 500 |
48 |
TEST_ROUNDS = 500 |
49 |
KEEP_P = (0.5) |
50 |
|
51 |
# For RCS testing, what percent to select |
52 |
FILE_P = (0.30) |
53 |
|
54 |
# For run-speed tests |
55 |
MIN_RUN = 1000 * 1000 * 1 |
56 |
MAX_RUN = 1000 * 1000 * 10 |
57 |
|
58 |
# Testwide defaults |
59 |
ALL_ARGS = [ |
60 |
# -v |
61 |
] |
62 |
|
63 |
# The first 7 args go to -C |
64 |
SOFT_CONFIG_CNT = 7 |
65 |
|
66 |
CONFIG_ORDER = [ 'large_look', |
67 |
'large_step', |
68 |
'small_look', |
69 |
'small_chain', |
70 |
'small_lchain', |
71 |
'max_lazy', |
72 |
'long_enough', |
73 |
|
74 |
# > SOFT_CONFIG_CNT |
75 |
'nocompress', |
76 |
'winsize', |
77 |
'srcwinsize', |
78 |
'sprevsz', |
79 |
'iopt', |
80 |
'djw', |
81 |
'altcode', |
82 |
] |
83 |
|
84 |
CONFIG_ARGMAP = { |
85 |
'winsize' : '-W', |
86 |
'srcwinsize' : '-B', |
87 |
'sprevsz' : '-P', |
88 |
'iopt' : '-I', |
89 |
'nocompress' : '-N', |
90 |
'djw' : '-Sdjw', |
91 |
'altcode' : '-T', |
92 |
} |
93 |
|
94 |
def INPUT_SPEC(rand): |
95 |
return { |
96 |
|
97 |
# Time/space costs: |
98 |
|
99 |
# -C 1,2,3,4,5,6,7 |
100 |
'large_look' : lambda d: rand.choice([9]), |
101 |
'large_step' : lambda d: rand.choice([3, 5, 7, 8, 15]), |
102 |
'small_chain' : lambda d: rand.choice([40, 10, 4, 1]), |
103 |
'small_lchain' : lambda d: rand.choice([x for x in [10, 4, 2, 1] if x <= d['small_chain']]), |
104 |
'max_lazy' : lambda d: rand.choice([9, 18, 27, 36, 72, 108]), |
105 |
'long_enough' : lambda d: rand.choice([9, 18, 27, 36, 72, 108]), |
106 |
'small_look' : lambda d: rand.choice([4]), |
107 |
|
108 |
# -N |
109 |
'nocompress' : lambda d: rand.choice(['true']), |
110 |
|
111 |
# -T |
112 |
'altcode' : lambda d: rand.choice(['false']), |
113 |
|
114 |
# -S djw |
115 |
'djw' : lambda d: rand.choice(['false']), |
116 |
|
117 |
# Memory costs: |
118 |
|
119 |
# -W |
120 |
'winsize' : lambda d: 8 * (1<<20), |
121 |
|
122 |
# -B |
123 |
'srcwinsize' : lambda d: 64 * (1<<20), |
124 |
|
125 |
# -I 0 is unlimited |
126 |
'iopt' : lambda d: 0, |
127 |
|
128 |
# -P only powers of two |
129 |
'sprevsz' : lambda d: rand.choice([x * (1<<16) for x in [4]]), |
130 |
} |
131 |
#end |
132 |
|
133 |
# |
134 |
TMPDIR = '/tmp/xd3regtest.%d' % os.getpid() |
135 |
|
136 |
RUNFILE = os.path.join(TMPDIR, 'run') |
137 |
DFILE = os.path.join(TMPDIR, 'output') |
138 |
RFILE = os.path.join(TMPDIR, 'recon') |
139 |
|
140 |
HEAD_STATE = 0 |
141 |
BAR_STATE = 1 |
142 |
REV_STATE = 2 |
143 |
DATE_STATE = 3 |
144 |
|
145 |
# |
146 |
IGNORE_FILENAME = re.compile('.*\\.(gif|jpg).*') |
147 |
|
148 |
# rcs output |
149 |
RE_TOTREV = re.compile('total revisions: (\\d+)') |
150 |
RE_BAR = re.compile('----------------------------') |
151 |
RE_REV = re.compile('revision (.+)') |
152 |
RE_DATE = re.compile('date: ([^;]+);.*') |
153 |
# xdelta output |
154 |
RE_HDRSZ = re.compile('VCDIFF header size: +(\\d+)') |
155 |
RE_EXTCOMP = re.compile('XDELTA ext comp.*') |
156 |
|
157 |
def c2str(c): |
158 |
return ' '.join(['%s' % x for x in c]) |
159 |
#end |
160 |
|
161 |
def SumList(l): |
162 |
return reduce(lambda x,y: x+y, l) |
163 |
#end |
164 |
|
165 |
# returns (total, mean, stddev, q2 (median), |
166 |
# (q3-q1)/2 ("semi-interquartile range"), max-min (spread)) |
167 |
class StatList: |
168 |
def __init__(self,l,desc): |
169 |
cnt = len(l) |
170 |
assert(cnt > 1) |
171 |
l.sort() |
172 |
self.cnt = cnt |
173 |
self.l = l |
174 |
self.total = SumList(l) |
175 |
self.mean = self.total / float(self.cnt) |
176 |
self.s = math.sqrt(SumList([(x-self.mean) * (x - self.mean) for x in l]) / float(self.cnt-1)) |
177 |
self.q0 = l[0] |
178 |
self.q1 = l[int(self.cnt/4.0+0.5)] |
179 |
self.q2 = l[int(self.cnt/2.0+0.5)] |
180 |
self.q3 = l[min(self.cnt-1,int((3.0*self.cnt)/4.0+0.5))] |
181 |
self.q4 = l[self.cnt-1]+1 |
182 |
self.siqr = (self.q3-self.q1)/2.0; |
183 |
self.spread = (self.q4-self.q0) |
184 |
self.str = '%s %d; mean %d; sdev %d; q2 %d; .5(q3-q1) %.1f; spread %d' % \ |
185 |
(desc, self.total, self.mean, self.s, self.q2, self.siqr, self.spread) |
186 |
#end |
187 |
#end |
188 |
|
189 |
def RunCommand(args, ok = [0]): |
190 |
#print 'run command %s' % (' '.join(args)) |
191 |
p = os.spawnvp(os.P_WAIT, args[0], args) |
192 |
if p not in ok: |
193 |
raise CommandError(args, 'exited %d' % p) |
194 |
#end |
195 |
#end |
196 |
|
197 |
def RunCommandIO(args,infn,outfn): |
198 |
p = os.fork() |
199 |
if p == 0: |
200 |
os.dup2(os.open(infn,os.O_RDONLY),0) |
201 |
os.dup2(os.open(outfn,os.O_CREAT|os.O_TRUNC|os.O_WRONLY),1) |
202 |
os.execvp(args[0], args) |
203 |
else: |
204 |
s = os.waitpid(p,0) |
205 |
o = os.WEXITSTATUS(s[1]) |
206 |
if not os.WIFEXITED(s[1]) or o != 0: |
207 |
raise CommandError(args, 'exited %d' % o) |
208 |
#end |
209 |
#end |
210 |
#end |
211 |
|
212 |
class TimedTest: |
213 |
def __init__(self, target, source, runnable, |
214 |
skip_trials = SKIP_TRIALS, |
215 |
min_trials = MIN_TRIALS, |
216 |
max_trials = MAX_TRIALS, |
217 |
min_stddev_pct = MIN_STDDEV_PCT): |
218 |
self.target = target |
219 |
self.source = source |
220 |
self.runnable = runnable |
221 |
|
222 |
self.skip_trials = skip_trials |
223 |
self.min_trials = min(min_trials, max_trials) |
224 |
self.max_trials = max_trials |
225 |
self.min_stddev_pct = min_stddev_pct |
226 |
|
227 |
self.encode_time = self.DoTest(DFILE, |
228 |
lambda x: x.Encode(self.target, self.source, DFILE)) |
229 |
self.encode_size = runnable.EncodeSize(DFILE) |
230 |
|
231 |
if SKIP_DECODE: |
232 |
self.decode_time = StatList([1, 1], 'not decoded') |
233 |
return |
234 |
#end |
235 |
|
236 |
self.decode_time = self.DoTest(RFILE, |
237 |
lambda x: x.Decode(DFILE, self.source, RFILE), |
238 |
) |
239 |
|
240 |
# verify |
241 |
runnable.Verify(self.target, RFILE) |
242 |
#end |
243 |
|
244 |
def DoTest(self, fname, func): |
245 |
trials = 0 |
246 |
measured = [] |
247 |
|
248 |
while 1: |
249 |
try: |
250 |
os.remove(fname) |
251 |
except OSError: |
252 |
pass |
253 |
|
254 |
start_time = time.time() |
255 |
start_clock = time.clock() |
256 |
|
257 |
func(self.runnable) |
258 |
|
259 |
total_clock = (time.clock() - start_clock) |
260 |
total_time = (time.time() - start_time) |
261 |
|
262 |
elap_time = max(total_time, 0.0000001) |
263 |
elap_clock = max(total_clock, 0.0000001) |
264 |
|
265 |
trials = trials + 1 |
266 |
|
267 |
# skip some of the first trials |
268 |
if trials > self.skip_trials: |
269 |
measured.append((elap_clock, elap_time)) |
270 |
#print 'measurement total: %.1f ms' % (total_time * 1000.0) |
271 |
|
272 |
# at least so many |
273 |
if trials < (self.skip_trials + self.min_trials): |
274 |
#print 'continue: need more trials: %d' % trials |
275 |
continue |
276 |
|
277 |
# compute %variance |
278 |
done = 0 |
279 |
if self.skip_trials + self.min_trials <= 2: |
280 |
measured = measured + measured; |
281 |
done = 1 |
282 |
#end |
283 |
|
284 |
time_stat = StatList([x[1] for x in measured], 'elap time') |
285 |
sp = float(time_stat.s) / float(time_stat.mean) |
286 |
|
287 |
# what if MAX_TRIALS is exceeded? |
288 |
too_many = (trials - self.skip_trials) >= self.max_trials |
289 |
good = (100.0 * sp) < self.min_stddev_pct |
290 |
if done or too_many or good: |
291 |
trials = trials - self.skip_trials |
292 |
if not done and not good: |
293 |
#print 'too many trials: %d' % trials |
294 |
pass |
295 |
#clock = StatList([x[0] for x in measured], 'elap clock') |
296 |
return time_stat |
297 |
#end |
298 |
#end |
299 |
#end |
300 |
#end |
301 |
|
302 |
def Decimals(start, end): |
303 |
l = [] |
304 |
step = start |
305 |
while 1: |
306 |
r = range(step, step * 10, step) |
307 |
l = l + r |
308 |
if step * 10 >= end: |
309 |
l.append(step * 10) |
310 |
break |
311 |
step = step * 10 |
312 |
return l |
313 |
#end |
314 |
|
315 |
# This tests the raw speed of 0-byte inputs |
316 |
def RunSpeedTest(): |
317 |
for L in Decimals(MIN_RUN, MAX_RUN): |
318 |
SetFileSize(RUNFILE, L) |
319 |
|
320 |
trx = TimedTest(RUNFILE, None, Xdelta3Runner(['-W', str(1<<20)])) |
321 |
ReportSpeed(L, trx, '1MB ') |
322 |
|
323 |
trx = TimedTest(RUNFILE, None, Xdelta3Runner(['-W', str(1<<19)])) |
324 |
ReportSpeed(L, trx, '512k') |
325 |
|
326 |
trx = TimedTest(RUNFILE, None, Xdelta3Runner(['-W', str(1<<18)])) |
327 |
ReportSpeed(L, trx, '256k') |
328 |
|
329 |
trm = TimedTest(RUNFILE, None, Xdelta3Mod1(RUNFILE)) |
330 |
ReportSpeed(L, trm, 'swig') |
331 |
|
332 |
trg = TimedTest(RUNFILE, None, GzipRun1()) |
333 |
ReportSpeed(L,trg,'gzip') |
334 |
#end |
335 |
#end |
336 |
|
337 |
def SetFileSize(F,L): |
338 |
fd = os.open(F, os.O_CREAT | os.O_WRONLY) |
339 |
os.ftruncate(fd,L) |
340 |
assert os.fstat(fd).st_size == L |
341 |
os.close(fd) |
342 |
#end |
343 |
|
344 |
def ReportSpeed(L,tr,desc): |
345 |
print '%s run length %u: size %u: time %.3f ms: decode %.3f ms' % \ |
346 |
(desc, L, |
347 |
tr.encode_size, |
348 |
tr.encode_time.mean * 1000.0, |
349 |
tr.decode_time.mean * 1000.0) |
350 |
#end |
351 |
|
352 |
class Xdelta3RunClass: |
353 |
def __init__(self, extra): |
354 |
self.extra = extra |
355 |
#end |
356 |
|
357 |
def __str__(self): |
358 |
return ' '.join(self.extra) |
359 |
#end |
360 |
|
361 |
def New(self): |
362 |
return Xdelta3Runner(self.extra) |
363 |
#end |
364 |
#end |
365 |
|
366 |
class Xdelta3Runner: |
367 |
def __init__(self, extra): |
368 |
self.extra = extra |
369 |
#end |
370 |
|
371 |
def Encode(self, target, source, output): |
372 |
args = (ALL_ARGS + |
373 |
self.extra + |
374 |
['-e']) |
375 |
if source: |
376 |
args.append('-s') |
377 |
args.append(source) |
378 |
#end |
379 |
args = args + [target, output] |
380 |
self.Main(args) |
381 |
#end |
382 |
|
383 |
def Decode(self, input, source, output): |
384 |
args = (ALL_ARGS + |
385 |
['-d']) |
386 |
if source: |
387 |
args.append('-s') |
388 |
args.append(source) |
389 |
#end |
390 |
args = args + [input, output] |
391 |
self.Main(args) |
392 |
#end |
393 |
|
394 |
def Verify(self, target, recon): |
395 |
RunCommand(('cmp', target, recon)) |
396 |
#end |
397 |
|
398 |
def EncodeSize(self, output): |
399 |
return os.stat(output).st_size |
400 |
#end |
401 |
|
402 |
def Main(self, args): |
403 |
try: |
404 |
xdelta3main.main(args) |
405 |
except Exception, e: |
406 |
raise CommandError(args, "xdelta3.main exception") |
407 |
#end |
408 |
#end |
409 |
#end |
410 |
|
411 |
class Xdelta3Mod1: |
412 |
def __init__(self, file): |
413 |
self.target_data = open(file, 'r').read() |
414 |
#end |
415 |
|
416 |
def Encode(self, ignore1, ignore2, ignore3): |
417 |
r1, encoded = xdelta3.xd3_encode_memory(self.target_data, None, 1000000, 1<<10) |
418 |
if r1 != 0: |
419 |
raise CommandError('memory', 'encode failed: %s' % r1) |
420 |
#end |
421 |
self.encoded = encoded |
422 |
#end |
423 |
|
424 |
def Decode(self, ignore1, ignore2, ignore3): |
425 |
r2, data1 = xdelta3.xd3_decode_memory(self.encoded, None, len(self.target_data)) |
426 |
if r2 != 0: |
427 |
raise CommandError('memory', 'decode failed: %s' % r1) |
428 |
#end |
429 |
self.decoded = data1 |
430 |
#end |
431 |
|
432 |
def Verify(self, ignore1, ignore2): |
433 |
if self.target_data != self.decoded: |
434 |
raise CommandError('memory', 'bad decode') |
435 |
#end |
436 |
#end |
437 |
|
438 |
def EncodeSize(self, ignore1): |
439 |
return len(self.encoded) |
440 |
#end |
441 |
#end |
442 |
|
443 |
class GzipRun1: |
444 |
def Encode(self, target, source, output): |
445 |
assert source == None |
446 |
RunCommandIO(['gzip', '-cf'], target, output) |
447 |
#end |
448 |
|
449 |
def Decode(self, input, source, output): |
450 |
assert source == None |
451 |
RunCommandIO(['gzip', '-dcf'], input, output) |
452 |
#end |
453 |
|
454 |
def Verify(self, target, recon): |
455 |
RunCommand(('cmp', target, recon)) |
456 |
#end |
457 |
|
458 |
def EncodeSize(self, output): |
459 |
return os.stat(output).st_size |
460 |
#end |
461 |
#end |
462 |
|
463 |
class Xdelta1RunClass: |
464 |
def __str__(self): |
465 |
return 'xdelta1' |
466 |
#end |
467 |
|
468 |
def New(self): |
469 |
return Xdelta1Runner() |
470 |
#end |
471 |
#end |
472 |
|
473 |
class Xdelta1Runner: |
474 |
def Encode(self, target, source, output): |
475 |
assert source != None |
476 |
args = ['xdelta1', 'delta', '-q', source, target, output] |
477 |
RunCommand(args, [0, 1]) |
478 |
#end |
479 |
|
480 |
def Decode(self, input, source, output): |
481 |
assert source != None |
482 |
args = ['xdelta1', 'patch', '-q', input, source, output] |
483 |
# Note: for dumb historical reasons, xdelta1 returns 1 or 0 |
484 |
RunCommand(args) |
485 |
#end |
486 |
|
487 |
def Verify(self, target, recon): |
488 |
RunCommand(('cmp', target, recon)) |
489 |
#end |
490 |
|
491 |
def EncodeSize(self, output): |
492 |
return os.stat(output).st_size |
493 |
#end |
494 |
#end |
495 |
|
496 |
# exceptions |
497 |
class SkipRcsException: |
498 |
def __init__(self,reason): |
499 |
self.reason = reason |
500 |
#end |
501 |
#end |
502 |
|
503 |
class NotEnoughVersions: |
504 |
def __init__(self): |
505 |
pass |
506 |
#end |
507 |
#end |
508 |
|
509 |
class CommandError: |
510 |
def __init__(self,cmd,str): |
511 |
if type(cmd) is types.TupleType or \ |
512 |
type(cmd) is types.ListType: |
513 |
cmd = reduce(lambda x,y: '%s %s' % (x,y),cmd) |
514 |
#end |
515 |
print 'command was: ',cmd |
516 |
print 'command failed: ',str |
517 |
print 'have fun debugging' |
518 |
#end |
519 |
#end |
520 |
|
521 |
class RcsVersion: |
522 |
def __init__(self,vstr): |
523 |
self.vstr = vstr |
524 |
#end |
525 |
def __cmp__(self,other): |
526 |
return cmp(self.date, other.date) |
527 |
#end |
528 |
def __str__(self): |
529 |
return str(self.vstr) |
530 |
#end |
531 |
#end |
532 |
|
533 |
class RcsFile: |
534 |
|
535 |
def __init__(self, fname): |
536 |
self.fname = fname |
537 |
self.versions = [] |
538 |
self.state = HEAD_STATE |
539 |
#end |
540 |
|
541 |
def SetTotRev(self,s): |
542 |
self.totrev = int(s) |
543 |
#end |
544 |
|
545 |
def Rev(self,s): |
546 |
self.rev = RcsVersion(s) |
547 |
if len(self.versions) >= self.totrev: |
548 |
raise SkipRcsException('too many versions (in log messages)') |
549 |
#end |
550 |
self.versions.append(self.rev) |
551 |
#end |
552 |
|
553 |
def Date(self,s): |
554 |
self.rev.date = s |
555 |
#end |
556 |
|
557 |
def Match(self, line, state, rx, gp, newstate, f): |
558 |
if state == self.state: |
559 |
m = rx.match(line) |
560 |
if m: |
561 |
if f: |
562 |
f(m.group(gp)) |
563 |
#end |
564 |
self.state = newstate |
565 |
return 1 |
566 |
#end |
567 |
#end |
568 |
return None |
569 |
#end |
570 |
|
571 |
def Sum1Rlog(self): |
572 |
f = os.popen('rlog '+self.fname, "r") |
573 |
l = f.readline() |
574 |
while l: |
575 |
if self.Match(l, HEAD_STATE, RE_TOTREV, 1, BAR_STATE, self.SetTotRev): |
576 |
pass |
577 |
elif self.Match(l, BAR_STATE, RE_BAR, 1, REV_STATE, None): |
578 |
pass |
579 |
elif self.Match(l, REV_STATE, RE_REV, 1, DATE_STATE, self.Rev): |
580 |
pass |
581 |
elif self.Match(l, DATE_STATE, RE_DATE, 1, BAR_STATE, self.Date): |
582 |
pass |
583 |
#end |
584 |
l = f.readline() |
585 |
#end |
586 |
c = f.close() |
587 |
if c != None: |
588 |
raise c |
589 |
#end |
590 |
#end |
591 |
|
592 |
def Sum1(self): |
593 |
st = os.stat(self.fname) |
594 |
self.rcssize = st.st_size |
595 |
self.Sum1Rlog() |
596 |
if self.totrev != len(self.versions): |
597 |
raise SkipRcsException('wrong version count') |
598 |
#end |
599 |
self.versions.sort() |
600 |
#end |
601 |
|
602 |
def Checkout(self,n): |
603 |
v = self.versions[n] |
604 |
out = open(self.Verf(n), "w") |
605 |
cmd = 'co -ko -p%s %s' % (v.vstr, self.fname) |
606 |
total = 0 |
607 |
(inf, |
608 |
stream, |
609 |
err) = os.popen3(cmd, "r") |
610 |
inf.close() |
611 |
buf = stream.read() |
612 |
while buf: |
613 |
total = total + len(buf) |
614 |
out.write(buf) |
615 |
buf = stream.read() |
616 |
#end |
617 |
v.vsize = total |
618 |
estr = '' |
619 |
buf = err.read() |
620 |
while buf: |
621 |
estr = estr + buf |
622 |
buf = err.read() |
623 |
#end |
624 |
if stream.close(): |
625 |
raise CommandError(cmd, 'checkout failed: %s\n%s\n%s' % (v.vstr, self.fname, estr)) |
626 |
#end |
627 |
out.close() |
628 |
err.close() |
629 |
#end |
630 |
|
631 |
def Vdate(self,n): |
632 |
return self.versions[n].date |
633 |
#end |
634 |
|
635 |
def Vstr(self,n): |
636 |
return self.versions[n].vstr |
637 |
#end |
638 |
|
639 |
def Verf(self,n): |
640 |
return os.path.join(TMPDIR, 'input.%d' % n) |
641 |
#end |
642 |
|
643 |
def FilePairsByDate(self, runclass): |
644 |
if self.totrev < 2: |
645 |
raise NotEnoughVersions() |
646 |
#end |
647 |
self.Checkout(0) |
648 |
ntrials = [] |
649 |
if self.totrev < 2: |
650 |
return vtrials |
651 |
#end |
652 |
for v in range(0,self.totrev-1): |
653 |
if v > 1: |
654 |
os.remove(self.Verf(v-1)) |
655 |
#end |
656 |
self.Checkout(v+1) |
657 |
if os.stat(self.Verf(v)).st_size < MIN_SIZE or \ |
658 |
os.stat(self.Verf(v+1)).st_size < MIN_SIZE: |
659 |
continue |
660 |
#end |
661 |
|
662 |
result = TimedTest(self.Verf(v+1), |
663 |
self.Verf(v), |
664 |
runclass.New()) |
665 |
|
666 |
target_size = os.stat(self.Verf(v+1)).st_size |
667 |
|
668 |
ntrials.append(result) |
669 |
#end |
670 |
|
671 |
os.remove(self.Verf(self.totrev-1)) |
672 |
os.remove(self.Verf(self.totrev-2)) |
673 |
return ntrials |
674 |
#end |
675 |
|
676 |
def AppendVersion(self, f, n): |
677 |
self.Checkout(n) |
678 |
rf = open(self.Verf(n), "r") |
679 |
data = rf.read() |
680 |
f.write(data) |
681 |
rf.close() |
682 |
return len(data) |
683 |
#end |
684 |
|
685 |
class RcsFinder: |
686 |
def __init__(self): |
687 |
self.subdirs = [] |
688 |
self.rcsfiles = [] |
689 |
self.others = [] |
690 |
self.skipped = [] |
691 |
self.biground = 0 |
692 |
#end |
693 |
|
694 |
def Scan1(self,dir): |
695 |
dents = os.listdir(dir) |
696 |
subdirs = [] |
697 |
rcsfiles = [] |
698 |
others = [] |
699 |
for dent in dents: |
700 |
full = os.path.join(dir, dent) |
701 |
if os.path.isdir(full): |
702 |
subdirs.append(full) |
703 |
elif dent[len(dent)-2:] == ",v": |
704 |
rcsfiles.append(RcsFile(full)) |
705 |
else: |
706 |
others.append(full) |
707 |
#end |
708 |
#end |
709 |
self.subdirs = self.subdirs + subdirs |
710 |
self.rcsfiles = self.rcsfiles + rcsfiles |
711 |
self.others = self.others + others |
712 |
return subdirs |
713 |
#end |
714 |
|
715 |
def Crawl(self, dir): |
716 |
subdirs = [dir] |
717 |
while subdirs: |
718 |
s1 = self.Scan1(subdirs[0]) |
719 |
subdirs = subdirs[1:] + s1 |
720 |
#end |
721 |
#end |
722 |
|
723 |
def Summarize(self): |
724 |
good = [] |
725 |
for rf in self.rcsfiles: |
726 |
try: |
727 |
rf.Sum1() |
728 |
if rf.totrev < 2: |
729 |
raise SkipRcsException('too few versions (< 2)') |
730 |
#end |
731 |
except SkipRcsException, e: |
732 |
#print 'skipping file %s: %s' % (rf.fname, e.reason) |
733 |
self.skipped.append(rf) |
734 |
else: |
735 |
good.append(rf) |
736 |
#end |
737 |
self.rcsfiles = good |
738 |
#end |
739 |
|
740 |
def AllPairsByDate(self, runclass): |
741 |
results = [] |
742 |
good = [] |
743 |
for rf in self.rcsfiles: |
744 |
try: |
745 |
results = results + rf.FilePairsByDate(runclass) |
746 |
except SkipRcsException: |
747 |
print 'file %s has compressed versions: skipping' % (rf.fname) |
748 |
except NotEnoughVersions: |
749 |
print 'testing %s on %s: not enough versions' % (runclass, rf.fname) |
750 |
else: |
751 |
good.append(rf) |
752 |
#end |
753 |
self.rcsfiles = good |
754 |
self.ReportPairs(runclass, results) |
755 |
return results |
756 |
#end |
757 |
|
758 |
def ReportPairs(self, name, results): |
759 |
encode_time = 0 |
760 |
decode_time = 0 |
761 |
encode_size = 0 |
762 |
for r in results: |
763 |
encode_time += r.encode_time.mean |
764 |
decode_time += r.decode_time.mean |
765 |
encode_size += r.encode_size |
766 |
#end |
767 |
print '%s rcs: encode %.2f s: decode %.2f s: size %d' % \ |
768 |
(name, encode_time, decode_time, encode_size) |
769 |
#end |
770 |
|
771 |
def MakeBigFiles(self, rand): |
772 |
f1 = open(TMPDIR + "/big.1", "w") |
773 |
f2 = open(TMPDIR + "/big.2", "w") |
774 |
population = [] |
775 |
for file in self.rcsfiles: |
776 |
if len(file.versions) < 2: |
777 |
continue |
778 |
population.append(file) |
779 |
#end |
780 |
f1sz = 0 |
781 |
f2sz = 0 |
782 |
fcount = int(len(population) * FILE_P) |
783 |
assert fcount > 0 |
784 |
for file in rand.sample(population, fcount): |
785 |
m = IGNORE_FILENAME.match(file.fname) |
786 |
if m != None: |
787 |
continue |
788 |
#end |
789 |
r1, r2 = rand.sample(xrange(0, len(file.versions)), 2) |
790 |
f1sz += file.AppendVersion(f1, r1) |
791 |
f2sz += file.AppendVersion(f2, r2) |
792 |
#m.update('%s,%s,%s ' % (file.fname[len(RCSDIR):], file.Vstr(r1), file.Vstr(r2))) |
793 |
#end |
794 |
testkey = 'rcs%d' % self.biground |
795 |
self.biground = self.biground + 1 |
796 |
|
797 |
print '%s; source %u bytes; target %u bytes' % (testkey, f1sz, f2sz) |
798 |
f1.close() |
799 |
f2.close() |
800 |
return (TMPDIR + "/big.1", |
801 |
TMPDIR + "/big.2", |
802 |
testkey) |
803 |
#end |
804 |
|
805 |
def Generator(self): |
806 |
return lambda rand: self.MakeBigFiles(rand) |
807 |
#end |
808 |
#end |
809 |
|
810 |
# find a set of RCS files for testing |
811 |
def GetTestRcsFiles(): |
812 |
rcsf = RcsFinder() |
813 |
rcsf.Crawl(RCSDIR) |
814 |
if len(rcsf.rcsfiles) == 0: |
815 |
raise CommandError('', 'no RCS files') |
816 |
#end |
817 |
rcsf.Summarize() |
818 |
print "rcsfiles: rcsfiles %d; subdirs %d; others %d; skipped %d" % (len(rcsf.rcsfiles), |
819 |
len(rcsf.subdirs), |
820 |
len(rcsf.others), |
821 |
len(rcsf.skipped)) |
822 |
print StatList([x.rcssize for x in rcsf.rcsfiles], "rcssize").str |
823 |
print StatList([x.totrev for x in rcsf.rcsfiles], "totrev").str |
824 |
return rcsf |
825 |
#end |
826 |
|
827 |
class SampleDataTest: |
828 |
def __init__(self, dirs): |
829 |
self.pairs = [] |
830 |
while dirs: |
831 |
d = dirs[0] |
832 |
dirs = dirs[1:] |
833 |
l = os.listdir(d) |
834 |
files = [] |
835 |
for e in l: |
836 |
p = os.path.join(d, e) |
837 |
if os.path.isdir(p): |
838 |
dirs.append(p) |
839 |
else: |
840 |
files.append(p) |
841 |
#end |
842 |
#end |
843 |
if len(files) > 1: |
844 |
files.sort() |
845 |
for x in xrange(len(files) - 1): |
846 |
self.pairs.append((files[x], files[x+1], |
847 |
'%s-%s' % (files[x], files[x+1]))) |
848 |
#end |
849 |
#end |
850 |
#end |
851 |
#end |
852 |
|
853 |
def Generator(self): |
854 |
return lambda rand: rand.choice(self.pairs) |
855 |
#end |
856 |
#end |
857 |
|
858 |
# configs are represented as a list of values, |
859 |
# program takes a list of strings: |
860 |
def ConfigToArgs(config): |
861 |
args = [ '-C', |
862 |
','.join([str(x) for x in config[0:SOFT_CONFIG_CNT]])] |
863 |
for i in range(SOFT_CONFIG_CNT, len(CONFIG_ORDER)): |
864 |
key = CONFIG_ARGMAP[CONFIG_ORDER[i]] |
865 |
val = config[i] |
866 |
if val == 'true' or val == 'false': |
867 |
if val == 'true': |
868 |
args.append('%s' % key) |
869 |
#end |
870 |
else: |
871 |
args.append('%s=%s' % (key, val)) |
872 |
#end |
873 |
#end |
874 |
return args |
875 |
#end |
876 |
|
877 |
# |
878 |
class RandomTest: |
879 |
def __init__(self, tnum, tinput, config, syntuple = None): |
880 |
self.mytinput = tinput[2] |
881 |
self.myconfig = config |
882 |
self.tnum = tnum |
883 |
|
884 |
if syntuple != None: |
885 |
self.runtime = syntuple[0] |
886 |
self.compsize = syntuple[1] |
887 |
self.decodetime = None |
888 |
else: |
889 |
args = ConfigToArgs(config) |
890 |
result = TimedTest(tinput[1], tinput[0], Xdelta3Runner(args)) |
891 |
|
892 |
self.runtime = result.encode_time.mean |
893 |
self.compsize = result.encode_size |
894 |
self.decodetime = result.decode_time.mean |
895 |
#end |
896 |
|
897 |
self.score = None |
898 |
self.time_pos = None |
899 |
self.size_pos = None |
900 |
self.score_pos = None |
901 |
#end |
902 |
|
903 |
def __str__(self): |
904 |
decodestr = '' |
905 |
if not SKIP_DECODE: |
906 |
decodestr = ' %.6f' % self.decodetime |
907 |
#end |
908 |
return 'time %.6f%s size %d%s << %s >>%s' % ( |
909 |
self.time(), ((self.time_pos != None) and (" (%s)" % self.time_pos) or ""), |
910 |
self.size(), ((self.size_pos != None) and (" (%s)" % self.size_pos) or ""), |
911 |
c2str(self.config()), |
912 |
decodestr) |
913 |
#end |
914 |
|
915 |
def time(self): |
916 |
return self.runtime |
917 |
#end |
918 |
|
919 |
def size(self): |
920 |
return self.compsize |
921 |
#end |
922 |
|
923 |
def config(self): |
924 |
return self.myconfig |
925 |
#end |
926 |
|
927 |
def score(self): |
928 |
return self.score |
929 |
#end |
930 |
|
931 |
def tinput(self): |
932 |
return self.mytinput |
933 |
#end |
934 |
#end |
935 |
|
936 |
def PosInAlist(l, e): |
937 |
for i in range(0, len(l)): |
938 |
if l[i][1] == e: |
939 |
return i; |
940 |
#end |
941 |
#end |
942 |
return -1 |
943 |
#end |
944 |
|
945 |
# Generates a set of num_results test configurations, given the list of |
946 |
# retest-configs. |
947 |
def RandomTestConfigs(rand, input_configs, num_results): |
948 |
|
949 |
outputs = input_configs[:] |
950 |
have_set = dict([(c,c) for c in input_configs]) |
951 |
|
952 |
# Compute a random configuration |
953 |
def RandomConfig(): |
954 |
config = [] |
955 |
cmap = {} |
956 |
for key in CONFIG_ORDER: |
957 |
val = cmap[key] = (INPUT_SPEC(rand)[key])(cmap) |
958 |
config.append(val) |
959 |
#end |
960 |
return tuple(config) |
961 |
#end |
962 |
|
963 |
while len(outputs) < num_results: |
964 |
newc = None |
965 |
for i in xrange(10): |
966 |
c = RandomConfig() |
967 |
if have_set.has_key(c): |
968 |
continue |
969 |
#end |
970 |
have_set[c] = c |
971 |
newc = c |
972 |
break |
973 |
if newc is None: |
974 |
print 'stopped looking for configs at %d' % len(outputs) |
975 |
break |
976 |
#end |
977 |
outputs.append(c) |
978 |
#end |
979 |
outputs.sort() |
980 |
return outputs |
981 |
#end |
982 |
|
983 |
def RunTestLoop(rand, generator, rounds): |
984 |
configs = [] |
985 |
for rnum in xrange(rounds): |
986 |
configs = RandomTestConfigs(rand, configs, MAX_RESULTS) |
987 |
tinput = generator(rand) |
988 |
tests = [] |
989 |
for x in xrange(len(configs)): |
990 |
t = RandomTest(x, tinput, configs[x]) |
991 |
print 'Round %d test %d: %s' % (rnum, x, t) |
992 |
tests.append(t) |
993 |
#end |
994 |
results = ScoreTests(tests) |
995 |
|
996 |
for r in results: |
997 |
c = r.config() |
998 |
if not test_all_config_results.has_key(c): |
999 |
test_all_config_results[c] = [r] |
1000 |
else: |
1001 |
test_all_config_results[c].append(r) |
1002 |
#end |
1003 |
#end |
1004 |
|
1005 |
GraphResults('expt%d' % rnum, results) |
1006 |
GraphSummary('sum%d' % rnum, results) |
1007 |
|
1008 |
# re-test some fraction |
1009 |
configs = [r.config() for r in results[0:int(MAX_RESULTS * KEEP_P)]] |
1010 |
#end |
1011 |
#end |
1012 |
|
1013 |
# TODO: cleanup |
1014 |
test_all_config_results = {} |
1015 |
|
1016 |
def ScoreTests(results): |
1017 |
scored = [] |
1018 |
timed = [] |
1019 |
sized = [] |
1020 |
|
1021 |
t_min = float(min([test.time() for test in results])) |
1022 |
#t_max = float(max([test.time() for test in results])) |
1023 |
s_min = float(min([test.size() for test in results])) |
1024 |
#s_max = float(max([test.size() for test in results])) |
1025 |
|
1026 |
for test in results: |
1027 |
|
1028 |
# Hyperbolic function. Smaller scores still better |
1029 |
red = 0.999 # minimum factors for each dimension are 1/1000 |
1030 |
test.score = ((test.size() - s_min * red) * |
1031 |
(test.time() - t_min * red)) |
1032 |
|
1033 |
scored.append((test.score, test)) |
1034 |
timed.append((test.time(), test)) |
1035 |
sized.append((test.size(), test)) |
1036 |
#end |
1037 |
|
1038 |
scored.sort() |
1039 |
timed.sort() |
1040 |
sized.sort() |
1041 |
|
1042 |
best_by_size = [] |
1043 |
best_by_time = [] |
1044 |
|
1045 |
pos = 0 |
1046 |
for (score, test) in scored: |
1047 |
pos += 1 |
1048 |
test.score_pos = pos |
1049 |
#end |
1050 |
|
1051 |
scored = [x[1] for x in scored] |
1052 |
|
1053 |
for test in scored: |
1054 |
test.size_pos = PosInAlist(sized, test) |
1055 |
test.time_pos = PosInAlist(timed, test) |
1056 |
#end |
1057 |
|
1058 |
for test in scored: |
1059 |
c = test.config() |
1060 |
s = 0.0 |
1061 |
print 'H-Score: %0.9f %s' % (test.score, test) |
1062 |
#end |
1063 |
|
1064 |
return scored |
1065 |
#end |
1066 |
|
1067 |
def GraphResults(desc, results): |
1068 |
f = open("data-%s.csv" % desc, "w") |
1069 |
for r in results: |
1070 |
f.write("%0.9f\t%d\t# %s\n" % (r.time(), r.size(), r)) |
1071 |
#end |
1072 |
f.close() |
1073 |
os.system("./plot.sh data-%s.csv plot-%s.jpg" % (desc, desc)) |
1074 |
#end |
1075 |
|
1076 |
def GraphSummary(desc, results_ignore): |
1077 |
test_population = 0 |
1078 |
config_ordered = [] |
1079 |
|
1080 |
# drops duplicate test/config pairs (TODO: don't retest them) |
1081 |
for config, cresults in test_all_config_results.items(): |
1082 |
input_config_map = {} |
1083 |
uniq = [] |
1084 |
for test in cresults: |
1085 |
assert test.config() == config |
1086 |
test_population += 1 |
1087 |
key = test.tinput() |
1088 |
if not input_config_map.has_key(key): |
1089 |
input_config_map[key] = {} |
1090 |
#end |
1091 |
if input_config_map[key].has_key(config): |
1092 |
print 'skipping repeat test %s vs. %s' % (input_config_map[key][config], test) |
1093 |
continue |
1094 |
#end |
1095 |
input_config_map[key][config] = test |
1096 |
uniq.append(test) |
1097 |
#end |
1098 |
config_ordered.append(uniq) |
1099 |
#end |
1100 |
|
1101 |
# sort configs descending by number of tests |
1102 |
config_ordered.sort(lambda x, y: len(y) - len(x)) |
1103 |
|
1104 |
print 'population %d: %d configs %d results' % \ |
1105 |
(test_population, |
1106 |
len(config_ordered), |
1107 |
len(config_ordered[0])) |
1108 |
|
1109 |
if config_ordered[0] == 1: |
1110 |
return |
1111 |
#end |
1112 |
|
1113 |
# a map from test-key to test-list w/ various configs |
1114 |
input_set = {} |
1115 |
osize = len(config_ordered) |
1116 |
|
1117 |
for i in xrange(len(config_ordered)): |
1118 |
config = config_ordered[i][0].config() |
1119 |
config_tests = config_ordered[i] |
1120 |
|
1121 |
#print '%s has %d tested inputs' % (config, len(config_tests)) |
1122 |
|
1123 |
if len(input_set) == 0: |
1124 |
input_set = dict([(t.tinput(), [t]) for t in config_tests]) |
1125 |
continue |
1126 |
#end |
1127 |
|
1128 |
# a map from test-key to test-list w/ various configs |
1129 |
update_set = {} |
1130 |
for r in config_tests: |
1131 |
t = r.tinput() |
1132 |
if input_set.has_key(t): |
1133 |
update_set[t] = input_set[t] + [r] |
1134 |
else: |
1135 |
#print 'config %s does not have test %s' % (config, t) |
1136 |
pass |
1137 |
#end |
1138 |
#end |
1139 |
|
1140 |
if len(update_set) <= 1: |
1141 |
break |
1142 |
#end |
1143 |
|
1144 |
input_set = update_set |
1145 |
|
1146 |
# continue if there are more w/ the same number of inputs |
1147 |
if i < (len(config_ordered) - 1) and \ |
1148 |
len(config_ordered[i + 1]) == len(config_tests): |
1149 |
continue |
1150 |
#end |
1151 |
|
1152 |
# synthesize results for multi-test inputs |
1153 |
config_num = None |
1154 |
|
1155 |
# map of config to sum(various test-keys) |
1156 |
smap = {} |
1157 |
for (key, tests) in input_set.items(): |
1158 |
if config_num == None: |
1159 |
# config_num should be the same in all elements |
1160 |
config_num = len(tests) |
1161 |
smap = dict([(r.config(), |
1162 |
(r.time(), |
1163 |
r.size())) |
1164 |
for r in tests]) |
1165 |
else: |
1166 |
# compuate the per-config sum of time/size |
1167 |
assert config_num == len(tests) |
1168 |
smap = dict([(r.config(), |
1169 |
(smap[r.config()][0] + r.time(), |
1170 |
smap[r.config()][1] + r.size())) |
1171 |
for r in tests]) |
1172 |
#end |
1173 |
#end |
1174 |
|
1175 |
if config_num == 1: |
1176 |
continue |
1177 |
#end |
1178 |
|
1179 |
if len(input_set) == osize: |
1180 |
break |
1181 |
#end |
1182 |
|
1183 |
summary = '%s-%d' % (desc, len(input_set)) |
1184 |
osize = len(input_set) |
1185 |
|
1186 |
print 'generate %s w/ %d configs' % (summary, config_num) |
1187 |
syn = [RandomTest(0, (None, None, summary), config, |
1188 |
syntuple = (smap[config][0], smap[config][1])) |
1189 |
for config in smap.keys()] |
1190 |
syn = ScoreTests(syn) |
1191 |
#print 'smap is %s' % (smap,) |
1192 |
#print 'syn is %s' % (' and '.join([str(x) for x in syn])) |
1193 |
GraphResults(summary, syn) |
1194 |
#end |
1195 |
#end |
1196 |
|
1197 |
if __name__ == "__main__": |
1198 |
try: |
1199 |
RunCommand(['rm', '-rf', TMPDIR]) |
1200 |
os.mkdir(TMPDIR) |
1201 |
|
1202 |
rcsf = GetTestRcsFiles() |
1203 |
generator = rcsf.Generator() |
1204 |
|
1205 |
#sample = SampleDataTest([SAMPLEDIR]) |
1206 |
#generator = sample.Generator() |
1207 |
|
1208 |
rand = random.Random(135135135135135) |
1209 |
RunTestLoop(rand, generator, TEST_ROUNDS) |
1210 |
|
1211 |
#RunSpeedTest() |
1212 |
|
1213 |
#x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-9'])) |
1214 |
#x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-9', '-S', 'djw'])) |
1215 |
#x3r = rcsf.AllPairsByDate(Xdelta3RunClass(['-9', '-T'])) |
1216 |
|
1217 |
#x1r = rcsf.AllPairsByDate(Xdelta1RunClass()) |
1218 |
|
1219 |
except CommandError: |
1220 |
pass |
1221 |
else: |
1222 |
RunCommand(['rm', '-rf', TMPDIR]) |
1223 |
pass |
1224 |
#end |
1225 |
#end |