我已经用过很多种方式来判断了,但效果都不怎么样?在未知网页的编码方式时,请问怎么才可以正常下载到网页,而不是乱码呢?请大家指教下
#region【获取网页HTML文本】 /// <summary> /// 获取url网页的HTML文本信息 /// </summary> /// <param name="url">网页URL</param> /// <param name="codeType">编码方式</param> /// <returns>返回HTML文本字符串</returns> public static String GetResponseText(string url, string codeType) { string responseFromServer = null; Stream dataStream = null; StreamReader reader = null; try { WebRequest request = WebRequest.Create(url); request.Credentials = CredentialCache.DefaultCredentials; HttpWebResponse response = (HttpWebResponse)request.GetResponse(); if (response.StatusDescription == "OK") { try { dataStream = response.GetResponseStream(); reader = new StreamReader(dataStream, GetPageEncoding(url)); responseFromServer = reader.ReadToEnd(); Regex rex = new Regex(@"(?<=charset\s*=\s*)[^""]*?(?="")", RegexOptions.IgnoreCase); string charset = rex.Match(responseFromServer, 0).Value; if (!charset.Equals("utf-8")) //如果编码方式不是utf-8的话,则重新用默认方式下载网页 { reader = new StreamReader(dataStream, Encoding.Default); responseFromServer = reader.ReadToEnd(); } } finally { reader.Close(); dataStream.Close(); } } response.Close(); return responseFromServer; } catch (Exception ex) { return ex.Message; } } #endregion