Compare commits

...

No commits in common. "master" and "0.9" have entirely different histories.
master ... 0.9

9 changed files with 322 additions and 59 deletions

View File

@ -1,10 +1,14 @@
/*
@2017-12-06,add by jidzh,for
*/
#include "AuxFun.h"
#include<Windows.h>
#include <string>
#include <assert.h>
#include "zlib/zconf.h"
#include "zlib/zlib.h"
#include "zconf.h"
#include "zlib.h"
std::string MBCStoUTF8(const char* mbcsStr)
{
wchar_t* wideStr;
@ -40,7 +44,6 @@ std::string UTF8ToMultiByte(const char* mbcsStr)
std::string strRet = strAnsi;
free(wideStr);
free(strAnsi);
return strRet;
}
@ -105,21 +108,23 @@ std::string UrlDecode(const std::string& str)
}
return strTemp;
}
int inflate_read(char *source, int len, char **dest, int gzip, int& iTotalRead)
{
int ret;
unsigned have;
z_stream strm;
unsigned char out[10000];
unsigned char out[10000] = {0};
int totalsize = 0;
/* allocate inflate state */
strcpy((char*)out, "");
strm.zalloc = Z_NULL;
strm.zfree = Z_NULL;
strm.opaque = Z_NULL;
strm.avail_in = 0;
strm.next_in = Z_NULL;
strm.avail_in = len;
strm.next_in = (Bytef*)source;
if (gzip)
ret = inflateInit2(&strm, 47);
else
@ -138,7 +143,7 @@ int inflate_read(char *source, int len, char **dest, int gzip, int& iTotalR
ret = inflate(&strm, Z_NO_FLUSH);
//assert(ret != Z_STREAM_ERROR); /* state not clobbered */
switch (ret) {
switch (ret) {
case Z_NEED_DICT:
ret = Z_DATA_ERROR; /* and fall through */
case Z_DATA_ERROR:
@ -158,4 +163,56 @@ int inflate_read(char *source, int len, char **dest, int gzip, int& iTotalR
(void)inflateEnd(&strm);
return ret == Z_STREAM_END ? Z_OK : Z_DATA_ERROR;
}
}
#define segment_size 1460//largest tcp data segment 1460
int ungzip(char* source, int len, char*des)
{
int ret, have;
int offset = 0;
z_stream d_stream;
Byte compr[segment_size] = { 0 }, uncompr[segment_size * 4] = { 0 };
memcpy(compr, (Byte*)source, len);
uLong comprLen, uncomprLen;
comprLen = len;//一开始写成了comprlen=sizeof(compr)以及comprlen=strlen(compr),后来发现都不对。
//sizeof(compr)永远都是segment_size显然不对strlen(compr)也是不对的因为strlen只算到\0之前
//但是gzip或者zlib数据里\0很多。
uncomprLen = segment_size * 4;
strcpy((char*)uncompr, "garbage");
d_stream.zalloc = Z_NULL;
d_stream.zfree = Z_NULL;
d_stream.opaque = Z_NULL;
d_stream.next_in = Z_NULL;//inflateInit和inflateInit2都必须初始化next_in和avail_in
d_stream.avail_in = 0;//deflateInit和deflateInit2则不用
ret = inflateInit2(&d_stream, 47);
if (ret != Z_OK)
{
printf("inflateInit2 error:%d", ret);
return ret;
}
d_stream.next_in = compr;
d_stream.avail_in = comprLen;
do
{
d_stream.next_out = uncompr;
d_stream.avail_out = uncomprLen;
ret = inflate(&d_stream, Z_NO_FLUSH);
assert(ret != Z_STREAM_ERROR);
switch (ret)
{
case Z_NEED_DICT:
ret = Z_DATA_ERROR;
case Z_DATA_ERROR:
case Z_MEM_ERROR:
(void)inflateEnd(&d_stream);
return ret;
}
have = uncomprLen - d_stream.avail_out;
memcpy(des + offset, uncompr, have);//这里一开始我写成了memcpy(des+offset,d_stream.next_out,have);
//后来发现这是不对的因为next_out指向的下次的输出现在指向的是无有意义数据的内存。见下图
offset += have;
} while (d_stream.avail_out == 0);
inflateEnd(&d_stream);
memcpy(des + offset, "\0", 1);
return ret;
}

View File

@ -5,5 +5,5 @@ std::string MBCStoUTF8(const char* mbcsStr);
std::string UTF8ToMultiByte(const char* mbcsStr);
std::string UrlEncode(const std::string& str);
int fun(char *source, int len, char **dest1, int gzip, int& iTotalRead);
int inflate_read(char *source, int len, char **dest, int gzip, int& iTotalRead);

12
CWeatherDataGet.cpp Normal file
View File

@ -0,0 +1,12 @@
#include "CWeatherDataGet.h"
CWeatherDataGet::CWeatherDataGet()
{
}
CWeatherDataGet::~CWeatherDataGet()
{
}

15
CWeatherDataGet.h Normal file
View File

@ -0,0 +1,15 @@
//------------------------------------
//author: jidzh
//data; 2017-10-30
//------------------------------------
#pragma once
class CWeatherDataGet
{
public:
CWeatherDataGet();
~CWeatherDataGet();
public:
};

View File

@ -1,6 +1,7 @@
#include "CWeatherDataSpider.h"
#include"AuxFun.h"
#include"parseXML.h"
#define _WINSOCK_DEPRECATED_NO_WARNINGS
//#include<WinSock2.h>
@ -12,7 +13,7 @@
#define WEATHER_FORECAST 1
CWeatherDataSpider::CWeatherDataSpider()
{
m_strAreaCode = "天津";
m_strAreaCode = "天津";
}
@ -20,18 +21,23 @@ CWeatherDataSpider::~CWeatherDataSpider()
{
}
bool CWeatherDataSpider::GetWeatherDataXml()
bool CWeatherDataSpider::CreateWSAdata()
{
//------------------创建描述符----------------------
WSADATA wsadata;
int ierror = -1;
if (ierror = WSAStartup(MAKEWORD(2,2), &wsadata))
if (ierror = WSAStartup(MAKEWORD(2, 2), &wsadata))
{
wchar_t tchTmp[100];
swprintf(tchTmp,100, L"WSAStartup failure: %d\n", ierror);
swprintf(tchTmp, 100, L"WSAStartup failure: %d\n", ierror);
ErrHandle(tchTmp);
return false;
}
return true;
}
bool CWeatherDataSpider::GetWeatherDataXml()
{
//------------------创建描述符----------------------
if (!CreateWSAdata()){ WSACleanup(); return false; }
//----------------------------------------------------
m_socket = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
if (INVALID_SOCKET == m_socket)
@ -49,12 +55,12 @@ bool CWeatherDataSpider::GetWeatherDataXml()
//}
//if(connect(sock, (SOCKADDR*)&sa, sizeof(sa)))
//连接主机
//连接主机
if (!ConnectHost()) { goto bad; }
//发送请求
//发送请求
if (!SendReq()) { goto bad; }
//接收回应
//接收回应
if (!HandleAck()) { goto bad; }
else{ CloseConnect(); return true; }
@ -86,51 +92,67 @@ bool CWeatherDataSpider::CheckIfRecSucceed(char * buf, int nTotalRecv, int nFlag
return false;
}
//解析回复是否是200
//截取xml
//解析回复是否是200
//截取xml
//
std::string strHeadFind =buf;
int n =strHeadFind.find("\r\n");
if (-1 != (n = strHeadFind.rfind("200", n))) { return true; }
//int n =strHeadFind.find("\r\n");
//if (-1 != (n = strHeadFind.rfind("200", n))) { return true; }
PosStart = strHeadFind.find("content-length: ");//该标签 加空格16个字符
PosStart =PosStart + 16;
PosEnd = strHeadFind.find("\r\n", PosStart);
std::string strLen =strHeadFind.substr(PosStart, PosEnd-PosStart);
int nlen = atoi(strLen.c_str());
PosStart = strHeadFind.find("\r\n\r\n")+4;
char* Output = NULL;//= new char[10000];
//获取内容长度
int iTotalRead = 0;
inflate_read((char*)buf + PosStart, nlen, &Output, 1, iTotalRead);
char* Output = NULL;//= new char[10000];
if (-1 != (PosStart = strHeadFind.find("Content-Length: ")))//该标签 加空格16个字符
{
PosStart = PosStart + 16;
PosEnd = strHeadFind.find("\r\n", PosStart);
std::string strLen = strHeadFind.substr(PosStart, PosEnd - PosStart);
int nlen = atoi(strLen.c_str());//转换
//寻找起始点
PosStart = strHeadFind.find("\r\n\r\n") + 4;
if (PosStart + nlen > nTotalRecv)
{
printf("err\n");
}
//char* Output = NULL;//= new char[10000];
//fun((char*)buf + PosStart, nlen, &Output, 1, iTotalRead);
inflate_read((char*)buf + PosStart, nlen, &Output, 1, iTotalRead);
}
else
{
PosStart = strHeadFind.find("\r\n\r\n") + 4;
std::string strlen = strHeadFind.substr(PosStart);
PosEnd = strlen.find("\r\n",0);
//char tmp[8] = {0};
// strncpy(tmp, buf+ PosStart,4);
int nlen = strtol(strlen.substr(0,PosEnd).c_str(), 0, 16);//转换
//TiXmlDocument xmlDoc;
//xmlDoc.Parse(m_pbyXmlBuf);
// int iTotalRead = 0;
inflate_read((char*)buf + PosStart+2, nlen, &Output, 1, iTotalRead);
//char *s = Output;
//if (xmlDoc.Error())
//{
// return false;
//}
//xmlDoc.SaveFile("D:\\demotest.xml");//for debug only
//if (nFlag == WEATHER_REALTIME)
//{
// m_xmlDocRealTime = xmlDoc;
//}
//else
//{
// m_xmlDocForecast = xmlDoc;
//}
}
FILE *fp = NULL;
fp = fopen("test.xml", "wb+");
int num = fwrite(Output, sizeof(char), iTotalRead, fp);
//fclose(fp);//如果没有这个会发生什么?
char txt[10000] = {0};
memcpy(txt, Output, iTotalRead);
// ReadFileAndTraversal();
XMLparse(txt);
std::string a = UTF8ToMultiByte(Output);
printf("%s", a.c_str());
printf("%d", iTotalRead>a.length()?1:0);
return true;
}
bool CWeatherDataSpider::SendReq()
{
//将 http://wthrcdn.etouch.cn/WeatherApi?city=天津 中文utf编码
//转换为 http://wthrcdn.etouch.cn/WeatherApi?city=%E5%A4%A9%E6%B4%A5
//将 http://wthrcdn.etouch.cn/WeatherApi?city=天津 中文utf编码
//转换为 http://wthrcdn.etouch.cn/WeatherApi?city=%E5%A4%A9%E6%B4%A5
std::string SendTmp = MBCStoUTF8(m_strAreaCode.c_str());
std::string SendTmp2 = UrlEncode(SendTmp);
std::string strSnd = "GET /WeatherApi?city=" + SendTmp2;
@ -146,7 +168,9 @@ bool CWeatherDataSpider::SendReq()
}
return true;
}
//突然懒了,一次接收算了,整这么多幺蛾子木用
//突然懒了,一次接收算了,整这么多幺蛾子木用
//@2017-12-05,之后还是要写成多次接收比较好,针对各种情况一步到位
bool CWeatherDataSpider::HandleAck()
{
char* pRecvBuff = new char[RECV_BUFF_LENGTH];//10kBbuff
@ -210,7 +234,7 @@ bool CWeatherDataSpider::ConnectHost()
return false;
}
//成功返回0
//成功返回0
int nRet = connect(m_socket, (SOCKADDR*)&sa, sizeof(sa));
if (!nRet)

View File

@ -18,21 +18,36 @@ class CWeatherDataSpider
public:
CWeatherDataSpider();
~CWeatherDataSpider();
public:
bool GetWeatherDataXml();
void CloseConnect();
void ErrHandle(const std::wstring msg, int code =0);
public:
std::string m_strAreaCode;
private:
//------------------------------------------------
SOCKET m_socket;
//WSADATA m_WSAdata;
bool CheckIfRecSucceed(char * buf, int nTotalRecv, int nFlag);
private:
//----------------------------------------------------
//创建描述符
bool CreateWSAdata();
//连接主机
bool ConnectHost();
//发送请求
bool SendReq();
//处理回应
bool HandleAck();
//检查是否接收成功
bool CheckIfRecSucceed(char * buf, int nTotalRecv, int nFlag);
public:
bool GetWeatherDataXml();
void CloseConnect();
void ErrHandle(const std::wstring msg, int code = 0);
};

120
parseXML.cpp Normal file
View File

@ -0,0 +1,120 @@
#include "parseXML.h"
#include <iostream>
#include "rapidxml.hpp"
#include "rapidxml_utils.hpp"
#include "rapidxml_print.hpp"
using namespace rapidxml;
parseXML::parseXML()
{
}
parseXML::~parseXML()
{
}
std::string ReadFile(const char* fileName)
{
setlocale(LC_ALL, "");
std::string strBuffer;
FILE *fp = fopen(fileName, "rb");
if (NULL == fp)
{
return "";
}
char buf[1024];
while (true)
{
const size_t byteRead = fread(buf, 1, sizeof(buf), fp);
if (0 == byteRead)
{
break;
}
strBuffer.insert(strBuffer.end(), buf, buf + byteRead);
}
fclose(fp);
return strBuffer;
}
//
int ReadFileAndTraversal()
{
//读取文件内容
std::string szFileBuf = ReadFile("test.xml");
//使用rapidxml::file读取文件更方便
rapidxml::file<> fdoc("test.xml");
//打印读取的内容
std::cout << fdoc.data() << std::endl;
rapidxml::xml_document<> doc;
doc.parse<0>(fdoc.data());
//doc.parse<0>(szFileBuf.c_str());
//打印整个XML内容
std::cout << doc.name() << std::endl;
//在XML文档中寻找第一个节点
const rapidxml::xml_node<> *ellipsoid = doc.first_node("EllipsoidParams");
if (NULL == ellipsoid)
{
return -1;
}
//遍历所有椭球信息
for (rapidxml::xml_node<> *datum = ellipsoid->first_node("Datum");
NULL != datum;
datum = datum->next_sibling())
{
//遍历单个椭球的所有属性
std::string szTmp("");
for (rapidxml::xml_attribute<char> * attr = datum->first_attribute("Name");
attr != NULL;
attr = attr->next_attribute())
{
szTmp.append(attr->name());//name() value()返回的字符串不会去掉首尾的空白字符
szTmp.append(": ");
szTmp.append(attr->value());
szTmp.append(", ");
}
std::cout << szTmp.c_str() << std::endl;
}
return 0;
}
bool XMLparse(char * data)
{
rapidxml::xml_document<> doc;
doc.parse<0>(data);
std::cout << doc.name() << std::endl;
//在XML文档中寻找第一个节点
const rapidxml::xml_node<> *ellipsoid = doc.first_node("resp");
if (NULL == ellipsoid)
{
return -1;
}
//遍历所有椭球信息
for (rapidxml::xml_node<> *datum = ellipsoid->first_node("weather");
NULL != datum;
datum = datum->next_sibling())
{
//遍历单个椭球的所有属性
std::string szTmp("");
for (rapidxml::xml_attribute<char> * attr = datum->first_attribute("Name");
attr != NULL;
attr = attr->next_attribute())
{
szTmp.append(attr->name());//name() value()返回的字符串不会去掉首尾的空白字符
szTmp.append(": ");
szTmp.append(attr->value());
szTmp.append(", ");
}
std::cout << szTmp.c_str() << std::endl;
}
return true;
}

11
parseXML.h Normal file
View File

@ -0,0 +1,11 @@
#pragma once
#include <tchar.h>
class parseXML
{
public:
parseXML();
~parseXML();
};
bool XMLparse(char * data);
int ReadFileAndTraversal();

9
testMain.cpp Normal file
View File

@ -0,0 +1,9 @@
#include "CWeatherDataSpider.h"
int main()
{
CWeatherDataSpider a;
a.GetWeatherDataXml();
return 0;
}