# 两个字符串相对而行,取重合部分看重复情况
# start:
# s1 aaaaaaaa
# s2 bbbbbbbb
# end:
# s1 aaaaaaaa
# s2 bbbbbbbb
def taiLongestSubstring(s1, s2):
"""连个字符串左对齐,取最长的公共子串,子串索引也相同"""
if len(s1) > len(s2):
s1, s2 = s2, s1 # aaaaaa
count = 0 # bbbbbbbbb
maxcount = 0
maxindex = 0
for i in range(len(s1)):
if s1[i] == s2[i]:
count += 1
else:
if count > maxcount:
maxcount = count
maxindex = i-1
count = 0
else:
if count > maxcount:
maxcount = count
maxindex = i
return s1[maxindex-maxcount+1:maxindex+1]
def comLongestSubstring(s1, s2):
tmp = ''
subs = ''
for i in range(len(s1)):
tmp = taiLongestSubstring(s1[-i-1:], s2)
if len(subs) < len(tmp):
subs = tmp
for j in range(1,len(s2)):
tmp = taiLongestSubstring(s1, s2[j:])
if len(subs) < len(tmp):
subs = tmp
return subs
# 所有子串集合取交集
def setSubstring(s):
set1 = set()
for i in range(1, len(s)+1):
for j in range(0, len(s)-i+1):
set1.add(s[j:j+i])
return set1
def setLongestSubstring(s1,s2):
longestSub = ''
for i in setSubstring(s1) & setSubstring(s2):
if len(i) > len(longestSub):
longestSub = i
return longestSub
# 类似上一个,拿最短字符串里的最长子串,在另一个字符串中查找
def LongestSubstring2(s1, s2):
if len(s1) > len(s2):
s1, s2 = s2, s1
for i in range(len(s1), 0, -1):
for j in range(0, len(s1) - i + 1):
subs = s1[j:j + i]
if s2.find(subs) != -1:
return subs
else:
return ''
# 矩阵法,那s2中的每一个元素和s1比较,将比较结果记录在矩阵中,
# 不同记为0,相同在左上位置的基础上加1,记录最长子串长度及其索引
def LongestSubstring3(s1,s2):
metrix = []
count = 0
maxindex = 0
for i, x in enumerate(s2):
metrix.append([])
for j, y in enumerate(s1):
if x != y:
metrix[i].append(0)
else:
if i == 0 or j == 0:
metrix[i].append(1)
else:
metrix[i].append(metrix[i-1][j-1] + 1)
if count < metrix[i][j]:
count = metrix[i][j]
maxindex = j
return s1[maxindex+1-count:maxindex+1]