ȸ»ç ÀÏ ¶§¹®¿¡ ³×À̹ö¿¡¼ »çÀü µ¥ÀÌÅ͸¦ ±Ü¾î¿À°Ô µÇ¾ú´Ù. HTML Çü½Äµµ ´ë° ´Ù ºñ½ÁÇϰí ÇØ¼ ÇϳªÀÇ ÇÁ·Î±×·¥À¸·Î ÇÑ¿µ,¿µÇÑ,ÇÑÀÏ,ÀÏÇÑ,ÇÑÇÑ »çÀü µîÀ» ¸ðµÎ ±Ü¾î¿Ã ¼ö ÀÖ¾ú´Ù. ¾Æ·¡´Â ±× ÇÁ·Î±×·¥ÀÇ ¼Ò½º´Ù. ´ëºÎºÐÀÇ ¼Ò½º´Â RetrievingFileUsingHttp ÆäÀÌÁö¿¡ ÀÖ´Â ¼Ò½º¿Í ¶È°°Àº ¼Ò½º´Ù. ´Ù¸¸ Àü¼ÛµÈ HTML¿¡¼ ÀÌ·±Àú·± ű׸¦ ¾ø¾ÖÁÖ´Â ºÎºÐÀÌ ³ªÁß¿¡ ¶Ç ©·Á¸é ±ÍÂúÀ» °Í °°¾Æ¼ ÀÌ·¸°Ô ³²°ÜµÐ´Ù.
#include "MTypes.h"
#include <conio.h>
#include <stdio.h>
#include <fcntl.h>
#include <io.h>
#include <winsock.h>
#include <fstream>
#include "MUtil.h"
void GetHTTP(LPCSTR lpServerName, LPCSTR lpFileName, string& text);
// Helper macro for displaying errors
#define PRINTERROR(s) \
fprintf(stderr,"\n%: %d\n", s, WSAGetLastError())
void main(int argc, char **argv)
{
WORD wVersionRequested = MAKEWORD(1,1);
WSADATA wsaData;
int nRet;
if (argc != 3)
{
cerr << "Usage: " << argv[0] << " START_INDEX END_INDEX" << endl;
return;
}
int start_index = atoi(argv[1]);
int end_index = atoi(argv[2]);
if (start_index > end_index)
{
cout << "END_INDEX must be larger than START_INDEX!" << endl;
return;
}
nRet = WSAStartup(wVersionRequested, &wsaData);
if (nRet)
{
fprintf(stderr,"\nWSAStartup(): %d\n", nRet);
WSACleanup();
return;
}
if (wsaData.wVersion != wVersionRequested)
{
fprintf(stderr,"\nWinSock version not supported\n");
WSACleanup();
return;
}
char buf[1024] = {0, };
string text;
string title;
sprintf(buf, "download_%d_%d.txt", start_index, end_index);
ofstream file(buf, ios::out | ios::trunc);
//string token1 = "<a class=hb";
//string token2 = "</small>";
string token1 = "<!-- content -->";
string token2 = "<!-- end of content -->";
size_t begin = 0;
size_t end = 0;
text.reserve(1024*100);
for (int i=start_index; i<=end_index; i++)
{
memset(buf, 0, 1024);
sprintf(buf, "/endic.php?docid=%d", i);
//http://endic.naver.com/endic.php?docid=135905
GetHTTP("endic.naver.com", buf, text);
begin = text.find("<title>", 0);
end = text.find("</title>", begin);
if (begin < end && end != string::npos)
{
title = text.substr(begin + 7 + 25, end - begin - 7 - 25 - 1);
}
begin = text.find(token1, 0);
end = text.find(token2, begin);
if (begin < end && end != string::npos)
{
file << i << " =================================================="
<< endl << title << " | ";
text = text.substr(begin, end - begin + token2.size());
//file << text.substr(begin, end - begin + 8) << endl;
size_t i = 0;
size_t j = 0;
size_t k = 0;
// k
// i j i j
// 0123456789012345678901234567890123456789
// <a href="...">hm</a><a href="...">...</a>
while (i < text.size() && j < text.size())
{
i = text.find_first_of('<', k);
if (i == string::npos) break;
j = text.find_first_of('>', i);
if (j == string::npos) break;
if (k < i)
{
string subtext = text.substr(k, i-k);
if (!subtext.empty()) file << subtext;
}
k = j + 1;
}
file << endl;
}
cout << i << endl;
}
WSACleanup();
}
void GetHTTP(LPCSTR lpServerName, LPCSTR lpFileName, string& text)
{
IN_ADDR iaHost;
LPHOSTENT lpHostEntry;
iaHost.s_addr = inet_addr(lpServerName);
if (iaHost.s_addr == INADDR_NONE)
{
// Wasn't an IP address string, assume it is a name
lpHostEntry = gethostbyname(lpServerName);
}
else
{
// It was a valid IP address string
lpHostEntry = gethostbyaddr((const char *)&iaHost,
sizeof(struct in_addr), AF_INET);
}
if (lpHostEntry == NULL)
{
PRINTERROR("gethostbyname()");
return;
}
SOCKET Socket = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
if (Socket == INVALID_SOCKET)
{
PRINTERROR("socket()");
return;
}
LPSERVENT lpServEnt;
SOCKADDR_IN saServer;
lpServEnt = getservbyname("http", "tcp");
if (lpServEnt == NULL)
saServer.sin_port = htons(80);
else
saServer.sin_port = lpServEnt->s_port;
saServer.sin_family = AF_INET;
saServer.sin_addr = *((LPIN_ADDR)*lpHostEntry->h_addr_list);
int nRet = connect(Socket, (LPSOCKADDR)&saServer, sizeof(SOCKADDR_IN));
if (nRet == SOCKET_ERROR)
{
PRINTERROR("connect()");
closesocket(Socket);
return;
}
static char szBuffer[1024*100] = {0, };
memset(szBuffer, 0, sizeof(szBuffer));
sprintf(szBuffer, "GET %s\n", lpFileName);
nRet = send(Socket, szBuffer, strlen(szBuffer), 0);
if (nRet == SOCKET_ERROR)
{
PRINTERROR("send()");
closesocket(Socket);
return;
}
text = "";
while(1)
{
memset(szBuffer, 0, sizeof(szBuffer));
// Wait to receive, nRet = NumberOfBytesReceived
nRet = recv(Socket, szBuffer, sizeof(szBuffer), 0);
if (nRet == SOCKET_ERROR)
{
PRINTERROR("recv()");
break;
}
//fprintf(stderr, " == %d bytes\n", nRet);
// Did the server close the connection?
if (nRet == 0)
{
break;
}
text += string(szBuffer);
}
closesocket(Socket);
}
SeriousMoin v1 (koMoinMoin 1.0a4 Modified)