def naive_string_match(s, p): print "searching for " + p + " in " + s for i in range(len(s) - len(p) + 1): for j in range(len(p)): if s[i + j] != p[j]: break if j == len(p) - 1: return i return -1 def prefix(s): pre = [0] i = 1 matches = 0 while i < len(s): print i, matches, s[i], s[matches] if s[i] == s[matches]: matches += 1 pre.append(matches) print pre[-1] i += 1 elif matches > 0: matches = pre[matches - 1] else: pre.append(0) print pre[-1] i += 1 return pre def knuth_morris_pratt_match(s, p): pre = prefix(p) i = 0 matches = 0 while i < len(s): if s[i] == p[matches]: matches += 1 if matches == len(p): return i + 1 - len(p) else: i += 1 elif matches > 0: matches = pre[matches - 1] else: i += 1 return -1 def hash(s): p = 65521 r = 65536 % p h = 0 n = len(s) - 1 for c in s: newr = 1 for i in range(n): newr = (newr * (r % p)) % p h += ord(c) * newr n -= 1 return h def rehash(old, n, new, plen): p = 65521 r = 65536 % p newr = 1 for i in range(plen - 1): newr = (newr * (r % p)) % p return (((n - old * newr) % p) * r) % p + new def rabin_karp_match(s, p): h = hash(p) print h for i in range(len(s) - len(p) + 1): if i == 0: h2 = hash(s[:len(p)]) else : h2 = rehash(ord(s[i-1]), h2, ord(s[i+len(p)-1]), len(p)) print h2 if h == h2: m = naive_string_match(p, s[i:i+len(p)]) if m != -1: return i return -1 def main(): print naive_string_match("TACHATWESHATLKEKWEHAT", "HAT") print rabin_karp_match("TACHATWESHATLKEKWEHAT", "HAT") print knuth_morris_pratt_match("TACHATWESHATLKEKWEHAT", "HAT") main()