VC网络编程问题:读取网页后,如何将<br ⼀>、<span>、<⼀div> 等等无关英文去掉?

2025-04-27 06:59:04
推荐回答(2个)
回答1:

#include
#include

OLECHAR szHTML[] = OLESTR("

百度寻人
齐心协力,让孩子早日回家
");

int __stdcall WinMain(HINSTANCE hInst,
HINSTANCE hPrev,
LPSTR lpCmdLine,
int nShowCmd)
{
IHTMLDocument2 *pDoc = NULL;

CoInitialize(NULL);

CoCreateInstance(CLSID_HTMLDocument,
NULL,
CLSCTX_INPROC_SERVER,
IID_IHTMLDocument2,
(LPVOID *) &pDoc);

if (pDoc)
{
IPersistStreamInit *pPersist = NULL;

pDoc->QueryInterface(IID_IPersistStreamInit,
(LPVOID *) &pPersist);

if (pPersist)
{
IMarkupServices *pMS = NULL;

pPersist->InitNew();
pPersist->Release();

pDoc->QueryInterface(IID_IMarkupServices,
(LPVOID *) &pMS);

if (pMS)
{
IMarkupContainer *pMC = NULL;
IMarkupPointer *pMkStart = NULL;
IMarkupPointer *pMkFinish = NULL;

pMS->CreateMarkupPointer(&pMkStart);
pMS->CreateMarkupPointer(&pMkFinish);

pMS->ParseString(szHTML,
0,
&pMC,
pMkStart,
pMkFinish);

if (pMC)
{
IHTMLDocument2 *pNewDoc = NULL;

pMC->QueryInterface(IID_IHTMLDocument,
(LPVOID *) &pNewDoc);

if (pNewDoc)
{
// do anything with pNewDoc, in this case
// get the body innerText.

IHTMLElement *pBody;
pNewDoc-gt;get_body(&pBody);

if (pBody)
{
BSTR strText;

pBody->get_innerText(&strText);
pBody->Release();

SysFreeString(strText);
}

pNewDoc->Release();
}

pMC->Release();
}

if (pMkStart)
pMkStart->Release();

if (pMkFinish)
pMkFinish->Release();

pMS->Release();
}
}

pDoc->Release();
}

CoUninitialize();

return TRUE;
}

回答2:

my god
你编 浏览器?