正则表达式号称开发者得瑞士军刀,使用好正则表达式尤其重要。

拆分多个正则:

public static string[] SplitByManyRegex(string text, string[] subRegexStrings)
{
string allRegexString = "^(?<mySubGroup0>.*?)";
for (int i = 0; i < subRegexStrings.Length; i++)
{
allRegexString += "(?<mySubGroup" + (i + 1) + ">" + subRegexStrings[i] + ".*?)";
}
allRegexString += "$";

Regex subRegex = new Regex(allRegexString, RegexOptions.Singleline | RegexOptions.IgnoreCase);
MatchCollection mc = subRegex.Matches(text);
if (mc.Count <= 0)
{
return new string[] { text };
}

List<int> positions = new List<int>();
for (int m = 0; m < subRegexStrings.Length + 1; m++)
{
positions.Add(mc[0].Groups["mySubGroup" + m].Index);
}

List<string> result = new List<string>();

for (int i = 0; i < positions.Count; i++)
{
int nextPos = 0;
if (i < positions.Count - 1) nextPos = positions[i + 1];
else nextPos = text.Length;
result.Add(text.Substring(positions[i], nextPos - positions[i]));
}
return result.ToArray();
}

  调用:

string[] tags = { "【答案】", "【解析】" };

  拆分单个正则:

public static string[] SplitByRegex(string text, string subRegexString)
{
Regex subRegex = new Regex(subRegexString, RegexOptions.Singleline | RegexOptions.IgnoreCase);
MatchCollection mc = subRegex.Matches(text);
if (mc.Count <= 0)
{
return new string[] { text };
}

List<int> positions = new List<int>();
for (int m = 0; m < mc.Count; m++)
{
positions.Add(mc[m].Index);
}

List<string> result = new List<string>();
result.Add(text.Substring(0, positions[0]));

for (int i = 0; i < positions.Count; i++)
{
int nextPos = 0;
if (i < mc.Count - 1) nextPos = positions[i + 1];
else nextPos = text.Length;
result.Add(text.Substring(positions[i], nextPos - positions[i]));
}

return result.ToArray();
}

  不反回第一条:

public static string[] SplitByRegexNoFirtPart(string text, string subRegexString)
{
string[] ary = SplitByRegex(text, subRegexString);
return TrimFirstElementOfArray(ary);
}

private static string[] TrimFirstElementOfArray(string[] ary)
{
if (ary == null || ary.Length == 0) return new string[0];
string[] result = new string[ary.Length - 1];
for (int i = 1; i < ary.Length; i++) result[i - 1] = ary[i];
return result;
}

  拆分如:(A(B(C?)?)?)

public static string[] SplitByManyRegex_MayLess(string text, string[] subRegexStrings)
{
string allRegexString = "^(?<mySubGroup0>.*?)";

for (int i = 0; i < subRegexStrings.Length; i++)
{
allRegexString += "((?<mySubGroup" + (i + 1) + ">" + subRegexStrings[i] + ".*?)";
}
for (int i = subRegexStrings.Length-1; i >=0 ; i--)
{
allRegexString += "?)";
}

allRegexString += "$";

Regex subRegex = new Regex(allRegexString, RegexOptions.Singleline | RegexOptions.IgnoreCase);
MatchCollection mc = subRegex.Matches(text);
if (mc.Count <= 0)
{
return new string[] { text };
}

List<int> positions = new List<int>();
for (int m = 0; m < subRegexStrings.Length + 1; m++)
{
if (mc[0].Groups["mySubGroup" + m].Success)
{
positions.Add(mc[0].Groups["mySubGroup" + m].Index);
}
}

List<string> result = new List<string>();

for (int i = 0; i < positions.Count; i++)
{
int nextPos = 0;
if (i < positions.Count - 1) nextPos = positions[i + 1];
else nextPos = text.Length;
result.Add(text.Substring(positions[i], nextPos - positions[i]));
}
return result.ToArray();
}

  可以任意顺序,任意个数:

public static string[] SplitByManyRegex_AnyOrder(string text, string[] subRegexStrings, bool resultChangeOrder = true )
{
if(string.IsNullOrEmpty(text) || subRegexStrings==null || subRegexStrings.Length == 0)
{
return new string[] { text };
}

string allReg = "(" + string.Join("|", subRegexStrings) + ")";
string[] result = SplitByRegex(text, allReg);

if (!resultChangeOrder) return result;

string[] ordered = new string[subRegexStrings.Length+1];
ordered[0] = result[0];
for(int i=1; i<result.Length; i++)
{
//将某部分放到对应的正则顺序
for(int k=0; k< subRegexStrings.Length; k++)
{
if(Regex.Match( result[i], subRegexStrings[k]).Success)
{
ordered[k+1] = result[i];
}
}
//如果某个没有找到则保持为null
}
return ordered;
}

  用正则表达式替换文本中的内容:

public static string TranformHandAnswer(string html)
{
string strReg = "(?<hand>(<handanswer>(.*?)</handanswer>))"; //正则表达式
Regex regex = new Regex(strReg, RegexOptions.Singleline | RegexOptions.IgnoreCase);

int _subjectOrderNum = subjectOrderNum; //TODO: Lambda不允许ref变量,这里临时这样用
html = regex.Replace(html, (Match match) =>
{
string handContent = match.Groups["hand"].Value;
string result = “替换得文本”

return result;
});

return html;
}

  有以上几个辅助类,在难得正则拆分都能搞定。