using System.Text.RegularExpressions; //正则 string strHtml = " /ul div id=\"photo-tags\" h3 id=\"tags-title\"Tags in this photo: /h3 ul id='tags' li a href='/user/669345/tags/Belgium'Belgium /a /li li a href='/user/669345/tags/Belgien'Be
using System.Text.RegularExpressions; //正则
string strHtml = " </ul> <div id=\"photo-tags\"> <h3 id=\"tags-title\">Tags in this photo: </h3>
<ul id='tags'>
<li> <a href='/user/669345/tags/Belgium'>Belgium </a> </li>
<li> <a href='/user/669345/tags/Belgien'>Belgien </a> </li>
<li> <a href='/user/669345/tags/Urlaub'>Urlaub </a> </li>
<li> <a href='/user/669345/tags/Holidays'>Holidays </a> </li>
<li> <a href='/user/669345/tags/Vakanties'>Vakanties </a> </li>
<li> <a href='/user/669345/tags/CenterParcs'>CenterParcs </a> </li>
<li> <a href='/user/669345/tags/10 000 000'>10 000 000 </a> </li>
</ul> </div> <div id=\"photo-info\">";
TextBox1.Text = "" + strHtml + "";
Regex re = new Regex("(?<=<li> <a href='/user/[^>]*/tags/[^>]*>).*?(?= </a> </li>)");
if (re.IsMatch(strHtml))
{
MatchCollection mc = re.Matches(strHtml);
foreach (Match ma in mc)
{
for (int i = 0; i < ma.Groups.Count; i++)
{
TextBox2.Text += ma.Groups[i].Value + " ";
}
TextBox2.Text += "\n";
}
}
else
{
TextBox2.Text = "no";
}
结果:
Belgium
Belgien
Urlaub
Holidays
Vakanties
CenterParcs
10 000 000
/// <summary>
/// 从文章内容中获取第一张图片,用作文章缩略图
/// </summary>
/// <param name="articleContent"></param>
/// <returns></returns>
public static string GetImageUrlFromArticle(string articleContent)
{
Regex r = new Regex(@"<IMG[^>]+src=\s*(?:'(?<src>[^']+)'|""(?<src>[^""]+)""|(?<src>[^>\s]+))\s*[^>]*>", RegexOptions.IgnoreCase);
MatchCollection mc = r.Matches(articleContent);
if (mc.Count != 0)
{
return mc[0].Groups["src"].Value.ToLower();
}
else
{
return "";
}
}