This repository has been archived on 2021-11-25. You can view files and clone it, but cannot push or open issues/pull-requests.
BigDataProject/2.4.cpp

135 lines
4.3 KiB
C++

#include "MySQLWrapper.h"
#include <iostream>
#include <sstream>
#include <vector>
#include <map>
#include <set>
#include <cstdlib>
#include <cstdio>
#include <cstring>
#include <algorithm>
#include <fstream>
using namespace std;
#include "local_db_account.h"
#include "util.hpp"
int main()
{
/// 2.4 全国公司的城市分布
MySQLConn conn;
conn.connect("127.0.0.1",db_user,db_passwd,"bigdata3",3306);
cout<<"Connected to DB"<<endl;
conn.exec("set names gbk",nullptr);
map<string,int> mp;
conn.exec("select job_city,count(*) from (select job_city from lagou_job group by company_name) as A group by job_city",
[&](MySQLResult& res)
{
res.stepRow([&](char** val,unsigned long* len)
{
string city(val[0]);
int cnt=ParseInt(val[1]);
auto iter=mp.find(city);
if(iter!=mp.end())
{
iter->second += cnt;
}
else
{
mp.insert(make_pair(city,cnt));
}
});
});
conn.exec("use bigdata5",nullptr);
conn.exec("select job_city,count(*) from (select job_city from pages group by company_name) as B group by job_city",
[&](MySQLResult& res)
{
res.stepRow([&](char** val,unsigned long* len)
{
string city(val[0]);
int cnt=ParseInt(val[1]);
auto iter=mp.find(city);
if(iter!=mp.end())
{
iter->second += cnt;
}
else
{
mp.insert(make_pair(city,cnt));
}
});
});
conn.exec("use bigdata2",nullptr);
conn.exec("select job_city,count(*) from (select job_city from pages group by company_name) as B group by job_city",
[&](MySQLResult& res)
{
res.stepRow([&](char** val,unsigned long* len)
{
string city(val[0]);
int cnt=ParseInt(val[1]);
auto iter=mp.find(city);
if(iter!=mp.end())
{
iter->second += cnt;
}
else
{
mp.insert(make_pair(city,cnt));
}
});
});
for(auto iter=mp.begin();iter!=mp.end(); )
{
bool updated=false;
for(auto xiter=mp.begin();xiter!=iter; )
{
if(iter->first.find(xiter->first)!=string::npos)
{
/// Found something?
/// target+=this : Add current to the previous
xiter->second+=iter->second;
/// iter should now be deleted. Get next iter first
auto nowiter=iter;
++iter;
mp.erase(nowiter);
/// Stop internal loop.
updated=true;
break;
}
else ++xiter;
}
if(!updated) ++iter;
}
vector<pair<string,int>> vec;
for(const auto& pr:mp)
{
vec.push_back(pr);
}
sort(vec.begin(),vec.end(),[](const pair<string,int>& a,const pair<string,int>& b)
{
return a.second > b.second;
});
ofstream ofs("result/2.4.csv");
#define cout ofs
cout<<"城市,公司数量"<<endl;
int szVec=vec.size();
for(int i=0;i<szVec;i++)
{
cout<<vec[i].first<<","<<vec[i].second<<endl;
}
#undef cout
return 0;
}