Normally when you have a UTF8 encoded file then it might have BOM character that will be rendered when you read it using filestream even though when you open the file with the notepad it doesn’t have that character.
I’ve created a function to remove the BOM character from the file
private void removeBoms(string filePattern, string directory)
{
try
{
foreach (string filename in Directory.GetFiles(directory, filePattern))
{
var bytes = System.IO.File.ReadAllBytes(filename);
if (bytes.Length > 2 && bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF)
{
System.IO.File.WriteAllBytes(filename, bytes.Skip(3).ToArray());
}
}
MessageBox.Show(“Files have been processed completely!”);
}
catch (Exception ex)
{
MessageBox.Show(ex.ToString());
}
//uncomment this for recursive
//foreach (string subDirectory in Directory.GetDirectories(directory))
//{
// removeBoms(filePattern, subDirectory);
//}
}
How to call it
private void btnProcess_Click(object sender, EventArgs e)
{
string extension = txtExtensions.Text;
if (extension.Trim().Length > 0 || folderBrowserDialog1.SelectedPath.Trim().Length > 0)
{
if (!extension.StartsWith(“*.”))
{
extension = “*.” + extension;
}
//just process the html files
removeBoms(extension, folderBrowserDialog1.SelectedPath);
}
else
{
if (extension.Trim().Length == 0)
{
txtExtensions.Focus();
MessageBox.Show(“Please specify the extension files”);
}
else if (folderBrowserDialog1.SelectedPath.Trim().Length == 0)
{
MessageBox.Show(“Please select target folder”);
}
}
}