I will show you three different methods to remove HTML tags from string in C#:
1. By using Regex:
1. By using Regex:
public static string RemoveHTMLTags(string html)
{
return Regex.Replace(html, "<.*?>", string.Empty);
}
2. By using Compiled Regex for better performance:
static Regex htmlRegex = new Regex("<.*?>", RegexOptions.Compiled);
public static string RemoveHTMLTagsCompiled(string html)
{
return htmlRegex.Replace(html, string.Empty);
}
3. By using Char Array for faster performance for several HTML files:
public static string RemoveHTMLTagsCharArray(string html)
{
char[] charArray = new char[html.Length];
int index = 0;
bool isInside = false;
for (int i = 0; i < html.Length; i++)
{
char left = html[i];
if (left == '<')
{
isInside = true;
continue;
}
if (left == '>')
{
isInside = false;
continue;
}
if (!isInside)
{
charArray[index] = left;
index++;
}
}
return new string(charArray, 0, index);
}
Source: https://www.nilebits.com/blog/2010/01/how-to-remove-html-tags-from-string-in-c/