curious benchmark
Aug. 30th, 2010 02:24 pm![[personal profile]](https://www.dreamwidth.org/img/silk/identity/user.png)
Tried to figure out whether
here's what I got:
Oh fcuk! The pattern was wrong, it was successfully failing on the first character!
Here are the right results:
The first parameter is the sample length ("a", "ab", "abcd" etc); the second is the length of a randomly-built string that, in half of the cases, contains the sample at a random position.
String.contains()
is better or worse than Pattern.compile()
(do this once) .matcher().matches()
:here's what I got:
StringContains(ab,1000): 0.4291536470 sec/million PatternMatches(ab,1000): 0.1891851651 sec/million StringContains(abcd,1000): 0.4248620093 sec/million PatternMatches(abcd,1000): 0.2675056776 sec/million StringContains(abcdefgh,1000): 0.4405976392 sec/million PatternMatches(abcdefgh,1000): 0.1933575138 sec/million
Oh fcuk! The pattern was wrong, it was successfully failing on the first character!
Here are the right results:
StringContains(1,5000): 0.2350807750 sec/million PatternMatches(1,5000): 74.5895135201 sec/million StringContains(2,5000): 1.8787401950 sec/million PatternMatches(2,5000): 154.4377975827 sec/million StringContains(4,5000): 2.3345991795 sec/million PatternMatches(4,5000): 169.6984495178 sec/million StringContains(8,5000): 1.6679779701 sec/million PatternMatches(8,5000): 105.2310321675 sec/million StringContains(16,5000): 2.3345991795 sec/million PatternMatches(16,5000): 172.8726651203 sec/million StringContains(32,5000): 2.2831006681 sec/million PatternMatches(32,5000): 165.0592113295 sec/million StringContains(64,5000): 2.2811933159 sec/million PatternMatches(64,5000): 162.8616774509 sec/million StringContains(128,5000): 2.2935889183 sec/million PatternMatches(128,5000): 168.4775973630 sec/million StringContains(256,5000): 2.3403212363 sec/million PatternMatches(256,5000): 170.7972164571 sec/million StringContains(512,5000): 2.3403212363 sec/million PatternMatches(512,5000): 171.5297277500 sec/million
The first parameter is the sample length ("a", "ab", "abcd" etc); the second is the length of a randomly-built string that, in half of the cases, contains the sample at a random position.
package com.kaching.experimental.users.vlad; import java.util.Random; import java.util.regex.Matcher; import java.util.regex.Pattern; public class PatternMatchingBenchmark { static Random r = new Random(); static String[] randomStrings = new String[100000]; private static void buildRandomStrings(String sample, int textlength) { for (int i = 0; i < randomStrings.length; i++) { randomStrings[i] = newRandomString(sample, textlength); } } private static String newRandomString(String pattern, int stringLength) { int pos = r.nextInt((stringLength - pattern.length())); StringBuilder sb = new StringBuilder(); fillRandomChars(pos, sb); if (r.nextBoolean()) sb.append(pattern); fillRandomChars(stringLength - sb.length(), sb); return sb.toString(); } private static void fillRandomChars(int n, StringBuilder sb) { for (int i = 0; i < n; i++) { sb.append(randomChar()); } } private static char randomChar() { return (char)(32 + r.nextInt(95)); } interface Runner { void run(boolean actually); } static double benchmark(BenchmarkRunner benchmarkRunner, long timeToSpend) { long timeSpent = 0; long numRuns = 0; while (timeSpent < timeToSpend) { numRuns = numRuns * 2 + 1; timeSpent = runloop(benchmarkRunner, numRuns, true); } if (numRuns < 100) { throw new RuntimeException("Benchmark won't run just " + numRuns + " runs"); } runloop(benchmarkRunner, numRuns, false); long dryTime = runloop(benchmarkRunner, numRuns, false); timeSpent = runloop(benchmarkRunner, numRuns, true) - dryTime; return ((double) (timeSpent < (dryTime / 50) ? 0 : timeSpent)) / numRuns * 1000; } private static long runloop(BenchmarkRunner benchmarkRunner, long numRuns, boolean actually) { long now = System.currentTimeMillis(); for (int i = 0; i < numRuns; i++) { benchmarkRunner.run(actually); } if (actually && numRuns > 10 && !benchmarkRunner.found) { throw new RuntimeException("Benchmark " + benchmarkRunner + " failed to find anything"); } return System.currentTimeMillis() - now; } /** * @param args */ static double x = 0; static abstract class BenchmarkRunner { private String sample; int counter = -1; boolean found = false; BenchmarkRunner(String sample) { this.sample = sample; } protected String nextString() { counter = (counter + 1) % randomStrings.length; return randomStrings[counter++]; }; abstract void run(boolean actually); } static BenchmarkRunner stringContains(final String sample, final int textlength) { return new BenchmarkRunner(sample) { @Override public void run(boolean actually) { String s = nextString(); if (actually) { found |= s.contains(sample); } } }; } static BenchmarkRunner patternMatches(final String sample, final int textlength) { final Pattern p = Pattern.compile("^.*" + sample); return new BenchmarkRunner(sample) { @Override public void run(boolean actually) { String s = nextString(); if (actually) { Matcher m = p.matcher(s); found |= m.find(); } } }; } public static void main(String[] args) { for (int n = 1; n < 600; n *= 2) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < n; i++) { sb.append((char) ('a' + i % 26)); } compareBenchmarks(sb.toString(), 5000); } } private static void compareBenchmarks(String sample, int textlength) { buildRandomStrings(sample, textlength); System.out.printf("StringContains(%s,%s): \t %15.10f sec/million\n", sample.length(), textlength, benchmark(stringContains(sample, textlength), 1000)); System.out.printf("PatternMatches(%s,%s): \t %15.10f sec/million\n", sample.length(), textlength, benchmark(patternMatches(sample, textlength), 1000)); } }