| 网站首页 | 业界新闻 | 小组 | 威客 | 人才 | 下载频道 | 博客 | 代码贴 | 在线编程 | 编程论坛
欢迎加入我们,一同切磋技术
用户名:   
 
密 码:  
共有 466 人关注过本帖
标题:求助 正则表达式获取网页编码(gb2312)失败
只看楼主 加入收藏
追梦人zmrghy
Rank: 3Rank: 3
等 级:论坛游侠
帖 子:406
专家分:190
注 册:2021-4-9
结帖率:97.26%
收藏
已结贴  问题点数:20 回复次数:2 
求助 正则表达式获取网页编码(gb2312)失败
图片附件: 游客没有浏览图片的权限,请 登录注册


汉字是乱码,想在代码中获取gb2312


图片附件: 游客没有浏览图片的权限,请 登录注册


RegexBuddy 4 测试正则表达式 ".*?charset[=](.*?)[\"]>.*" ,显示正确。。。




图片附件: 游客没有浏览图片的权限,请 登录注册


实际应用是,获取 "gb2312" 失败,是什么原因。


程序代码:
#pragma once


namespace GetWebpageCode {

    using namespace System;
    using namespace System::ComponentModel;
    using namespace System::Collections;
    using namespace System::Windows::Forms;
    using namespace System::Data;
    using namespace System::Drawing;
    using namespace System::Net;
    using namespace System::Text;
    using namespace System::Text::RegularExpressions;

    /// <summary>
    /// Form1 摘要
    /// </summary>
    public ref class Form1 : public System::Windows::Forms::Form
    {
    public:
        Form1(void)
        {
            InitializeComponent();
            //
            //TODO:  在此处添加构造函数代码
            //
        }

    protected:
        /// <summary>
        /// 清理所有正在使用的资源。
        /// </summary>
        ~Form1()
        {
            if (components)
            {
                delete components;
            }
        }
    private: System::Windows::Forms::Button^ button1;
    protected:
    private: System::Windows::Forms::Button^ button2;
    private: System::Windows::Forms::Button^ button3;
    private: System::Windows::Forms::Button^ button4;
    private: System::Windows::Forms::Button^ button5;
    private: System::Windows::Forms::Button^ button6;
    private: System::Windows::Forms::TextBox^ textBox1;
    private: System::Windows::Forms::TextBox^ textBox2;

    private:
        /// <summary>
        /// 必需的设计器变量。
        /// </summary>
        System::ComponentModel::Container ^components;

#pragma region Windows Form Designer generated code
        /// <summary>
        /// 设计器支持所需的方法 - 不要修改
        /// 使用代码编辑器修改此方法的内容。
        /// </summary>
        void InitializeComponent(void)
        {
            this->button1 = (gcnew System::Windows::Forms::Button());
            this->button2 = (gcnew System::Windows::Forms::Button());
            this->button3 = (gcnew System::Windows::Forms::Button());
            this->button4 = (gcnew System::Windows::Forms::Button());
            this->button5 = (gcnew System::Windows::Forms::Button());
            this->button6 = (gcnew System::Windows::Forms::Button());
            this->textBox1 = (gcnew System::Windows::Forms::TextBox());
            this->textBox2 = (gcnew System::Windows::Forms::TextBox());
            this->SuspendLayout();
            // 
            // button1
            // 
            this->button1->Font = (gcnew System::Drawing::Font(L"宋体", 12));
            this->button1->Location = System::Drawing::Point(33, 53);
            this->button1->Name = L"button1";
            this->button1->Size = System::Drawing::Size(100, 35);
            this->button1->TabIndex = 0;
            this->button1->Text = L"button1";
            this->button1->UseVisualStyleBackColor = true;
            this->button1->Click += gcnew System::EventHandler(this, &Form1::button1_Click);
            // 
            // button2
            // 
            this->button2->Font = (gcnew System::Drawing::Font(L"宋体", 12));
            this->button2->Location = System::Drawing::Point(196, 53);
            this->button2->Name = L"button2";
            this->button2->Size = System::Drawing::Size(100, 35);
            this->button2->TabIndex = 1;
            this->button2->Text = L"button2";
            this->button2->UseVisualStyleBackColor = true;
            // 
            // button3
            // 
            this->button3->Font = (gcnew System::Drawing::Font(L"宋体", 12));
            this->button3->Location = System::Drawing::Point(359, 53);
            this->button3->Name = L"button3";
            this->button3->Size = System::Drawing::Size(100, 35);
            this->button3->TabIndex = 2;
            this->button3->Text = L"button3";
            this->button3->UseVisualStyleBackColor = true;
            // 
            // button4
            // 
            this->button4->Font = (gcnew System::Drawing::Font(L"宋体", 12));
            this->button4->Location = System::Drawing::Point(522, 53);
            this->button4->Name = L"button4";
            this->button4->Size = System::Drawing::Size(100, 35);
            this->button4->TabIndex = 3;
            this->button4->Text = L"button4";
            this->button4->UseVisualStyleBackColor = true;
            // 
            // button5
            // 
            this->button5->Font = (gcnew System::Drawing::Font(L"宋体", 12));
            this->button5->Location = System::Drawing::Point(685, 53);
            this->button5->Name = L"button5";
            this->button5->Size = System::Drawing::Size(100, 35);
            this->button5->TabIndex = 4;
            this->button5->Text = L"button5";
            this->button5->UseVisualStyleBackColor = true;
            // 
            // button6
            // 
            this->button6->Font = (gcnew System::Drawing::Font(L"宋体", 12));
            this->button6->Location = System::Drawing::Point(848, 53);
            this->button6->Name = L"button6";
            this->button6->Size = System::Drawing::Size(100, 35);
            this->button6->TabIndex = 5;
            this->button6->Text = L"button6";
            this->button6->UseVisualStyleBackColor = true;
            // 
            // textBox1
            // 
            this->textBox1->Font = (gcnew System::Drawing::Font(L"宋体", 12));
            this->textBox1->Location = System::Drawing::Point(32, 12);
            this->textBox1->Name = L"textBox1";
            this->textBox1->Size = System::Drawing::Size(915, 26);
            this->textBox1->TabIndex = 6;
            // 
            // textBox2
            // 
            this->textBox2->Location = System::Drawing::Point(0, 104);
            this->textBox2->Multiline = true;
            this->textBox2->Name = L"textBox2";
            this->textBox2->ScrollBars = System::Windows::Forms::ScrollBars::Vertical;
            this->textBox2->Size = System::Drawing::Size(1007, 536);
            this->textBox2->TabIndex = 7;
            // 
            // Form1
            // 
            this->AutoScaleDimensions = System::Drawing::SizeF(6, 12);
            this->AutoScaleMode = System::Windows::Forms::AutoScaleMode::Font;
            this->ClientSize = System::Drawing::Size(1008, 641);
            this->Controls->Add(this->textBox2);
            this->Controls->Add(this->textBox1);
            this->Controls->Add(this->button6);
            this->Controls->Add(this->button5);
            this->Controls->Add(this->button4);
            this->Controls->Add(this->button3);
            this->Controls->Add(this->button2);
            this->Controls->Add(this->button1);
            this->Name = L"Form1";
            this->Text = L"Form1";
            this->ResumeLayout(false);
            this->PerformLayout();

        }
#pragma endregion
    private:String^ WebPageCode;
    private:System::Void button1_Click(System::Object^ sender, System::EventArgs^ e)
    {
        WebClient^ myWebClient = gcnew WebClient;

        array<Byte>^ mybuffer = myWebClient->DownloadData(textBox1->Text);
        //WebPageCode = Encoding::GetEncoding(936)->GetString(mybuffer);
        //UTF8Encoding temp;
        ASCIIEncoding temp;
        WebPageCode=temp.GetString(mybuffer);
        Regex^ charcode=gcnew Regex(".*?charset[=](.*?)[\"] > .*");
        String^ StrCode = charcode->Replace(WebPageCode, "$1");
        WebPageCode = Encoding::GetEncoding(StrCode)->GetString(mybuffer);
        textBox2->Text = WebPageCode;
    }
};
}
搜索更多相关主题的帖子: Windows Forms Button Size System 
2023-03-04 03:03
阳光上的桥
Rank: 16Rank: 16Rank: 16Rank: 16
等 级:版主
威 望:38
帖 子:129
专家分:772
注 册:2023-1-12
收藏
得分:20 
图片附件: 游客没有浏览图片的权限,请 登录注册
2023-03-09 17:27
追梦人zmrghy
Rank: 3Rank: 3
等 级:论坛游侠
帖 子:406
专家分:190
注 册:2021-4-9
收藏
得分:0 
回复 2楼 阳光上的桥
表达式书写错误了,不是主要问题。
WebClient^ myWebClient = gcnew WebClient;
    array<Byte>^ mybuffer = myWebClient->DownloadData(textBox1->Text);
    ASCIIEncoding temp;
    String^ WebPageCode = temp.GetString(mybuffer);

 WebPageCode里面有很多个字符串,正则匹配的内容是从 WebPageCode 开始到时“\r\n”回车换行就结束了,不再匹配后面内容。。。。
弄了几天终于想到办法了。。。
程序代码:
private:System::Void button1_Click(System::Object^ sender, System::EventArgs^ e)
{
    WebClient^ myWebClient = gcnew WebClient;
    array<Byte>^ mybuffer = myWebClient->DownloadData(textBox1->Text);
    ASCIIEncoding temp;
    String^ WebPageCode = temp.GetString(mybuffer);
    String^ StrCode = GetCharacterEncoding(WebPageCode);
    WebPageCode = Encoding::GetEncoding(StrCode)->GetString(mybuffer);
    textBox2->Text = WebPageCode;
}
private: String^ GetCharacterEncoding(String^ Str)
{
    Regex^ Rex1 = gcnew Regex(".*?charset[=](.*?)[\"].*");
    array<String^>^ StrArr1 = Regex::Split(Str, "\r\n");
    for each (auto i in StrArr1)
    {
        if (Rex1->IsMatch(i))
        {
            return Rex1->Replace(i, "$1");
        }
    }
    return nullptr;
}

图片附件: 游客没有浏览图片的权限,请 登录注册


[此贴子已经被作者于2023-3-11 00:24编辑过]

2023-03-11 00:22
快速回复:求助 正则表达式获取网页编码(gb2312)失败
数据加载中...
 
   



关于我们 | 广告合作 | 编程中国 | 清除Cookies | TOP | 手机版

编程中国 版权所有,并保留所有权利。
Powered by Discuz, Processed in 0.023101 second(s), 10 queries.
Copyright©2004-2024, BCCN.NET, All Rights Reserved