This repository has been archived on 2021-11-25. You can view files and clone it, but cannot push or open issues/pull-requests.
BigDataProject/3.2.cpp

268 lines
8.5 KiB
C++

#include "MySQLWrapper.h"
#include <iostream>
#include <sstream>
#include <vector>
#include <map>
#include <set>
#include <cstdlib>
#include <cstdio>
#include <cstring>
#include <algorithm>
#include <fstream>
using namespace std;
#include "local_db_account.h"
#include "util.hpp"
int main()
{
/// 3.2 大数据职位需求量最高的前10名城市+大数据前六个岗位需求量排行
MySQLConn conn;
conn.connect("127.0.0.1",db_user,db_passwd,"bigdata3",3306);
cout<<"Connected to DB"<<endl;
conn.exec("set names gbk",nullptr);
/// city,vector<int>
map<string,vector<int>> mp;
conn.exec("select job_city,count(*) as want_num from lagou_job where ((title like '%大数据%' ) or ( tags like '%大数据%')) group by job_city order by want_num desc",[&](MySQLResult& res)
{
res.stepRow([&](char** val,unsigned long* len)
{
string city(val[0]);
int cc=ParseInt(val[1]);
mp[Trim(city)].push_back(cc);
});
});
conn.exec("use bigdata5",nullptr);
conn.exec("select job_city,count(*) as want_num from pages where ((title like '%大数据%' ) or ( tags like '%大数据%')) group by job_city order by want_num desc",[&](MySQLResult& res)
{
res.stepRow([&](char** val,unsigned long* len)
{
string city(val[0]);
int cc=ParseInt(val[1]);
mp[Trim(city)].push_back(cc);
});
});
/// 去重+求和
auto fn1=[](map<string,vector<int>>& mp){
for(auto iter=mp.begin();iter!=mp.end(); )
{
bool updated=false;
for(auto xiter=mp.begin();xiter!=iter; )
{
if(iter->first.find(xiter->first)!=string::npos)
{
/// Found something?
/// targetvec.push_back(thisvec) : Add current vector to the previous vector.
xiter->second.insert(xiter->second.end(),
iter->second.begin(),iter->second.end());
/// iter should now be deleted. Get next iter first
auto nowiter=iter;
++iter;
mp.erase(nowiter);
/// Stop internal loop.
updated=true;
break;
}
else ++xiter;
}
if(!updated) ++iter;
}
for(auto& pr:mp)
{
int sum=0;
int n=pr.second.size();
for(int i=0;i<n;i++)
{
sum+=pr.second[i];
}
pr.second[0]=sum;
}
};
fn1(mp);
/// title, vector<int>
map<string,vector<int>> dmp;
conn.exec("use bigdata3",nullptr);
conn.exec("select title,count(*) as want_num from lagou_job where ((title like '%大数据%' ) or ( tags like '%大数据%')) group by title order by want_num desc",[&](MySQLResult& res)
{
res.stepRow([&](char** val,unsigned long* len)
{
string title(val[0]);
int cc=ParseInt(val[1]);
dmp[Trim(title)].push_back(cc);
});
});
conn.exec("use bigdata5",nullptr);
conn.exec("select title,count(*) as want_num from pages where ((title like '%大数据%' ) or ( tags like '%大数据%')) group by title order by want_num desc",[&](MySQLResult& res)
{
res.stepRow([&](char** val,unsigned long* len)
{
string city(val[0]);
int cc=ParseInt(val[1]);
dmp[Trim(city)].push_back(cc);
});
});
conn.exec("use bigdata2",nullptr);
conn.exec("select title,count(*) as want_num from pages where title like '%大数据%' group by title order by want_num desc",[&](MySQLResult& res)
{
res.stepRow([&](char** val,unsigned long* len)
{
string city(val[0]);
int cc=ParseInt(val[1]);
dmp[Trim(city)].push_back(cc);
});
});
fn1(dmp);
vector<pair<string,vector<int>>> city_vec;
do
{
vector<pair<string,vector<int>>> vec;
for(const auto& pr:mp)
{
vec.push_back(pr);
}
sort(vec.begin(),vec.end(),[&](const pair<string,vector<int>>& a,const pair<string,vector<int>>& b)
{
return a.second[0] > b.second[0] ;
});
city_vec.insert(city_vec.end(),vec.begin(),vec.begin()+10);
}while(0);
vector<pair<string,vector<int>>> job_vec;
do
{
vector<pair<string,vector<int>>> vec;
for(const auto& pr:dmp)
{
vec.push_back(pr);
}
sort(vec.begin(),vec.end(),[&](const pair<string,vector<int>>& a,const pair<string,vector<int>>& b)
{
return a.second[0] > b.second[0] ;
});
job_vec.insert(job_vec.end(),vec.begin(),vec.begin()+6);
}while(0);
/// index of city_vec, index of job_vec, count.
map<int,map<int,int>> tvec;
int szCity=city_vec.size();
int szJob=job_vec.size();
cout<<"szCity: "<<szCity<<" szJob: "<<szJob<<endl;
for(int i=0;i<szCity;i++)
{
for(int j=0;j<szJob;j++)
{
cout<<"i:"<<i<<"("<<city_vec[i].first<<") j:"<<j<<"("<<job_vec[j].first<<") "<<endl;
ostringstream ostm;
ostm << "select count(*) from lagou_job where ( (job_city like '%"
<<city_vec[i].first
<<"%' ) and (title like '%"
<<job_vec[j].first <<"%'))";
string cmd=ostm.str();
conn.exec("use bigdata3",nullptr);
int c1=0;
conn.exec(cmd,[&](MySQLResult& res)
{
bool done=false;
res.stepRow([&](char** val,unsigned long* len)
{
if(!done)
{
c1=ParseInt(val[0]);
done=true;
}
});
});
ostm.str("");
ostm << "select count(*) from pages where ( (job_city like '%"
<<city_vec[i].first
<<"%' ) and (title like '%"
<<job_vec[j].first <<"%'))";
cmd=ostm.str();
conn.exec("use bigdata5",nullptr);
int c2=0;
conn.exec(cmd,[&](MySQLResult& res)
{
bool done=false;
res.stepRow([&](char** val,unsigned long* len)
{
if(!done)
{
c2=ParseInt(val[0]);
done=true;
}
});
});
conn.exec("use bigdata2",nullptr);
int c3=0;
conn.exec(cmd,[&](MySQLResult& res)
{
bool done=false;
res.stepRow([&](char** val,unsigned long* len)
{
if(!done)
{
c3=ParseInt(val[0]);
done=true;
}
});
});
tvec[i][j]=c1+c2+c3;
}
}
ofstream ofs("result/3.2.csv");
#define cout ofs
cout<<" ";
for(int i=0;i<szJob;++i)
{
cout<<","<<job_vec[i].first;
}
cout<<endl;
for(int i=0;i<szCity;++i)
{
cout<<city_vec[i].first;
for(int j=0;j<szJob;j++)
{
cout<<","<<tvec[i][j];
}
cout<<endl;
}
#undef cout
return 0;
}