mirror of
https://github.com/awesomedata/awesome-public-datasets.git
synced 2024-04-18 07:30:58 +08:00
Merge branch 'master' of https://github.com/caesar0301/awesome-public-datasets
This commit is contained in:
commit
685e5e4e39
20
.travis.yml
20
.travis.yml
|
@ -1,10 +1,10 @@
|
||||||
language: ruby
|
# language: ruby
|
||||||
rvm:
|
# rvm:
|
||||||
- 2.2
|
# - 2.2
|
||||||
before_script:
|
# before_script:
|
||||||
- gem install awesome_bot
|
# - gem install awesome_bot
|
||||||
script:
|
# script:
|
||||||
- site404=www.datawrangling.com,getglue-data.s3.amazonaws.com,archive.org/details/2011-05-calufa-twitter-sql,www.stats4stem.org,lib.stat.cmu.edu,http://www.oecd.org/document/0,census.gov/acs/www/data_documentation/data_release_info/
|
# - site404=www.datawrangling.com,getglue-data.s3.amazonaws.com,archive.org/details/2011-05-calufa-twitter-sql,www.stats4stem.org,lib.stat.cmu.edu,http://www.oecd.org/document/0,census.gov/acs/www/data_documentation/data_release_info/
|
||||||
- whtlist=travis,crawdad.cs.dartmouth.edu,data.nasdaq.com,137.189.35.203/WebUI/CatDatabase/catData.html,numbrary.com,www.cmr.osu.edu,gutenberg.org,donnees.gouv.qc.ca,data.rio.rj.gov.br,ntrl.ntis.gov,openflights.org,www.data.gov.bc.ca,earthdata.nasa,pgp-hms,cru.uea.ac.uk,networkdata.ics,datos.argentina,data.gov.ie,isi.edu,data.go.id,wiki.dbpedia,www.laval.ca,www.wunderground.com,data.lexingtonky.gov,arcgis,bixi
|
# - whtlist=travis,crawdad.cs.dartmouth.edu,data.nasdaq.com,137.189.35.203/WebUI/CatDatabase/catData.html,numbrary.com,www.cmr.osu.edu,gutenberg.org,donnees.gouv.qc.ca,data.rio.rj.gov.br,ntrl.ntis.gov,openflights.org,www.data.gov.bc.ca,earthdata.nasa,pgp-hms,cru.uea.ac.uk,networkdata.ics,datos.argentina,data.gov.ie,isi.edu,data.go.id,wiki.dbpedia,www.laval.ca,www.wunderground.com,data.lexingtonky.gov,arcgis,bixi
|
||||||
- site503=datamob.org,research.microsoft.com
|
# - site503=datamob.org,research.microsoft.com
|
||||||
- awesome_bot README.rst --allow-dupe --allow-redirect --set-timeout 5 --allow-timeout --white-list $site404,$whtlist,$site503
|
# - awesome_bot README.rst --allow-dupe --allow-redirect --set-timeout 5 --allow-timeout --white-list $site404,$whtlist,$site503
|
||||||
|
|
|
@ -85,6 +85,7 @@ Government
|
||||||
* `Texas Open Data <https://data.texas.gov/>`_
|
* `Texas Open Data <https://data.texas.gov/>`_
|
||||||
* `The World Bank <http://wdronline.worldbank.org/>`_
|
* `The World Bank <http://wdronline.worldbank.org/>`_
|
||||||
* `Toronto, ON, Canada <http://www1.toronto.ca/wps/portal/contentonly?vgnextoid=1a66e03bb8d1e310VgnVCM10000071d60f89RCRD>`_
|
* `Toronto, ON, Canada <http://www1.toronto.ca/wps/portal/contentonly?vgnextoid=1a66e03bb8d1e310VgnVCM10000071d60f89RCRD>`_
|
||||||
|
* `Tunisia <http://www.data.gov.tn/>`_
|
||||||
* `U.K. Government Data <http://data.gov.uk/data>`_
|
* `U.K. Government Data <http://data.gov.uk/data>`_
|
||||||
* `U.S. American Community Survey <http://www.census.gov/acs/www/data_documentation/data_release_info/>`_
|
* `U.S. American Community Survey <http://www.census.gov/acs/www/data_documentation/data_release_info/>`_
|
||||||
* `U.S. CDC Public Health datasets <http://www.cdc.gov/nchs/data_access/ftp_data.htm>`_
|
* `U.S. CDC Public Health datasets <http://www.cdc.gov/nchs/data_access/ftp_data.htm>`_
|
||||||
|
|
95
README.rst
95
README.rst
|
@ -3,8 +3,6 @@ Awesome Public Datasets
|
||||||
.. image:: https://cdn.rawgit.com/sindresorhus/awesome/d7305f38d29fed78fa85652e3a63e154dd8e8829/media/badge.svg
|
.. image:: https://cdn.rawgit.com/sindresorhus/awesome/d7305f38d29fed78fa85652e3a63e154dd8e8829/media/badge.svg
|
||||||
:alt: Awesome
|
:alt: Awesome
|
||||||
:target: https://github.com/sindresorhus/awesome
|
:target: https://github.com/sindresorhus/awesome
|
||||||
.. image:: https://travis-ci.org/caesar0301/awesome-public-datasets.svg
|
|
||||||
:target: https://travis-ci.org/caesar0301/awesome-public-datasets
|
|
||||||
|
|
||||||
`This list of public data sources <https://github.com/caesar0301/awesome-public-datasets>`_
|
`This list of public data sources <https://github.com/caesar0301/awesome-public-datasets>`_
|
||||||
are collected and tidied from blogs, answers, and user responses.
|
are collected and tidied from blogs, answers, and user responses.
|
||||||
|
@ -29,7 +27,6 @@ Biology
|
||||||
* `Broad Cancer Cell Line Encyclopedia (CCLE) <http://www.broadinstitute.org/ccle/home>`_
|
* `Broad Cancer Cell Line Encyclopedia (CCLE) <http://www.broadinstitute.org/ccle/home>`_
|
||||||
* `Broad Bioimage Benchmark Collection (BBBC) <https://www.broadinstitute.org/bbbc>`_
|
* `Broad Bioimage Benchmark Collection (BBBC) <https://www.broadinstitute.org/bbbc>`_
|
||||||
* `Cell Image Library <http://www.cellimagelibrary.org>`_
|
* `Cell Image Library <http://www.cellimagelibrary.org>`_
|
||||||
* `Collaborative Research in Computational Neuroscience (CRCNS) <http://crcns.org/data-sets>`_
|
|
||||||
* `Complete Genomics Public Data <http://www.completegenomics.com/public-data/69-genomes/>`_
|
* `Complete Genomics Public Data <http://www.completegenomics.com/public-data/69-genomes/>`_
|
||||||
* `EBI ArrayExpress <http://www.ebi.ac.uk/arrayexpress/>`_
|
* `EBI ArrayExpress <http://www.ebi.ac.uk/arrayexpress/>`_
|
||||||
* `EBI Protein Data Bank in Europe <http://www.ebi.ac.uk/pdbe/emdb/index.html/>`_
|
* `EBI Protein Data Bank in Europe <http://www.ebi.ac.uk/pdbe/emdb/index.html/>`_
|
||||||
|
@ -48,8 +45,7 @@ Biology
|
||||||
* `MIT Cancer Genomics Data <http://www.broadinstitute.org/cgi-bin/cancer/datasets.cgi>`_
|
* `MIT Cancer Genomics Data <http://www.broadinstitute.org/cgi-bin/cancer/datasets.cgi>`_
|
||||||
* `NCBI Proteins <http://www.ncbi.nlm.nih.gov/guide/proteins/#databases>`_
|
* `NCBI Proteins <http://www.ncbi.nlm.nih.gov/guide/proteins/#databases>`_
|
||||||
* `NCBI Taxonomy <http://www.ncbi.nlm.nih.gov/taxonomy>`_
|
* `NCBI Taxonomy <http://www.ncbi.nlm.nih.gov/taxonomy>`_
|
||||||
* `NeuroData <http://neurodata.io>`_
|
* `NIH Microarray data <http://bit.do/VVW6>`_ or `FTP <ftp://ftp.ncbi.nih.gov/pub/geo/DATA/supplementary/series/GSE6532/>`_ (see FTP link on `RAW <https://raw.githubusercontent.com/caesar0301/awesome-public-datasets/master/README.rst>`_)
|
||||||
* `NIH Microarray data <http://bit.do/VVW6>`_ or `FTP <ftp://ftp.ncbi.nih.gov/pub/geo/DATA/supplementary/series/GSE6532/>`_
|
|
||||||
* `OpenSNP genotypes data <https://opensnp.org/>`_
|
* `OpenSNP genotypes data <https://opensnp.org/>`_
|
||||||
* `Pathguid - Protein-Protein Interactions Catalog <http://www.pathguide.org/>`_
|
* `Pathguid - Protein-Protein Interactions Catalog <http://www.pathguide.org/>`_
|
||||||
* `Protein Data Bank <http://www.rcsb.org/>`_
|
* `Protein Data Bank <http://www.rcsb.org/>`_
|
||||||
|
@ -62,7 +58,6 @@ Biology
|
||||||
* `Stanford Microarray Data <http://smd.stanford.edu/>`_
|
* `Stanford Microarray Data <http://smd.stanford.edu/>`_
|
||||||
* `Stowers Institute Original Data Repository <http://www.stowers.org/research/publications/odr>`_
|
* `Stowers Institute Original Data Repository <http://www.stowers.org/research/publications/odr>`_
|
||||||
* `Systems Science of Biological Dynamics (SSBD) Database <http://ssbd.qbic.riken.jp>`_
|
* `Systems Science of Biological Dynamics (SSBD) Database <http://ssbd.qbic.riken.jp>`_
|
||||||
* `Temple University Hospital EEG Database <https://www.nedcdata.org/drupal/node/12>`_
|
|
||||||
* `The Cancer Genome Atlas (TCGA), available via Broad GDAC <https://gdac.broadinstitute.org/>`_
|
* `The Cancer Genome Atlas (TCGA), available via Broad GDAC <https://gdac.broadinstitute.org/>`_
|
||||||
* `The Catalogue of Life <http://www.catalogueoflife.org/content/annual-checklist-archive>`_
|
* `The Catalogue of Life <http://www.catalogueoflife.org/content/annual-checklist-archive>`_
|
||||||
* `The Personal Genome Project <http://www.personalgenomes.org/>`_ or `PGP <https://my.pgp-hms.org/public_genetic_data>`_
|
* `The Personal Genome Project <http://www.personalgenomes.org/>`_ or `PGP <https://my.pgp-hms.org/public_genetic_data>`_
|
||||||
|
@ -75,6 +70,7 @@ Climate/Weather
|
||||||
---------------
|
---------------
|
||||||
|
|
||||||
* `Australian Weather <http://www.bom.gov.au/climate/dwo/>`_
|
* `Australian Weather <http://www.bom.gov.au/climate/dwo/>`_
|
||||||
|
* `Aviation Weather Center - Consistent, timely and accurate weather information for the world airspace system <https://aviationweather.gov/adds/dataserver>`_
|
||||||
* `Brazilian Weather - Historical data (In Portuguese) <http://sinda.crn2.inpe.br/PCD/SITE/novo/site/>`_
|
* `Brazilian Weather - Historical data (In Portuguese) <http://sinda.crn2.inpe.br/PCD/SITE/novo/site/>`_
|
||||||
* `Canadian Meteorological Centre <http://weather.gc.ca/grib/index_e.html>`_
|
* `Canadian Meteorological Centre <http://weather.gc.ca/grib/index_e.html>`_
|
||||||
* `Climate Data from UEA (updated monthly) <https://crudata.uea.ac.uk/cru/data/temperature/#datter and ftp://ftp.cmdl.noaa.gov/>`_
|
* `Climate Data from UEA (updated monthly) <https://crudata.uea.ac.uk/cru/data/temperature/#datter and ftp://ftp.cmdl.noaa.gov/>`_
|
||||||
|
@ -126,6 +122,7 @@ Computer Networks
|
||||||
* `CommonCrawl Web Data over 7 years <http://commoncrawl.org/the-data/get-started/>`_
|
* `CommonCrawl Web Data over 7 years <http://commoncrawl.org/the-data/get-started/>`_
|
||||||
* `CRAWDAD Wireless datasets from Dartmouth Univ. <https://crawdad.cs.dartmouth.edu/>`_
|
* `CRAWDAD Wireless datasets from Dartmouth Univ. <https://crawdad.cs.dartmouth.edu/>`_
|
||||||
* `Criteo click-through data <http://labs.criteo.com/2015/03/criteo-releases-its-new-dataset/>`_
|
* `Criteo click-through data <http://labs.criteo.com/2015/03/criteo-releases-its-new-dataset/>`_
|
||||||
|
* `OONI: Open Observatory of Network Interference - Internet censorship data <https://ooni.torproject.org/data/>`_
|
||||||
* `Open Mobile Data by MobiPerf <https://console.developers.google.com/storage/openmobiledata_public/>`_
|
* `Open Mobile Data by MobiPerf <https://console.developers.google.com/storage/openmobiledata_public/>`_
|
||||||
* `Rapid7 Sonar Internet Scans <https://sonar.labs.rapid7.com/>`_
|
* `Rapid7 Sonar Internet Scans <https://sonar.labs.rapid7.com/>`_
|
||||||
* `UCSD Network Telescope, IPv4 /8 net <http://www.caida.org/projects/network_telescope/>`_
|
* `UCSD Network Telescope, IPv4 /8 net <http://www.caida.org/projects/network_telescope/>`_
|
||||||
|
@ -148,19 +145,35 @@ Data Challenges
|
||||||
* `Kaggle Competition Data <https://www.kaggle.com/>`_
|
* `Kaggle Competition Data <https://www.kaggle.com/>`_
|
||||||
* `KDD Cup by Tencent 2012 <http://www.kddcup2012.org/>`_
|
* `KDD Cup by Tencent 2012 <http://www.kddcup2012.org/>`_
|
||||||
* `Localytics Data Visualization Challenge <https://github.com/localytics/data-viz-challenge>`_
|
* `Localytics Data Visualization Challenge <https://github.com/localytics/data-viz-challenge>`_
|
||||||
* `Netflix Prize <http://www.netflixprize.com/leaderboard>`_
|
* `Netflix Prize <http://netflixprize.com/leaderboard.html>`_
|
||||||
* `Space Apps Challenge <https://2015.spaceappschallenge.org>`_
|
* `Space Apps Challenge <https://2015.spaceappschallenge.org>`_
|
||||||
* `Telecom Italia Big Data Challenge <https://dandelion.eu/datamine/open-big-data/>`_
|
* `Telecom Italia Big Data Challenge <https://dandelion.eu/datamine/open-big-data/>`_
|
||||||
* `Yelp Dataset Challenge <http://www.yelp.com/dataset_challenge>`_
|
* `Yelp Dataset Challenge <http://www.yelp.com/dataset_challenge>`_
|
||||||
* `Bruteforce Database <https://github.com/duyetdev/bruteforce-database>`_
|
* `Bruteforce Database <https://github.com/duyetdev/bruteforce-database>`_
|
||||||
|
* `TravisTorrent Dataset - MSR'2017 Mining Challenge <https://travistorrent.testroots.org/>`_
|
||||||
|
|
||||||
|
|
||||||
|
Earth Science
|
||||||
|
-------------
|
||||||
|
|
||||||
|
* `AQUASTAT - Global water resources and uses <http://www.fao.org/nr/water/aquastat/data/query/index.html?lang=en>`_
|
||||||
|
* `BODC - marine data of ~22K vars <http://www.bodc.ac.uk/data/where_to_find_data/>`_
|
||||||
|
* `Earth Models <http://www.earthmodels.org/>`_
|
||||||
|
* `EOSDIS - NASA's earth observing system data <http://sedac.ciesin.columbia.edu/data/sets/browse>`_
|
||||||
|
* `Integrated Marine Observing System (IMOS) - roughly 30TB of ocean measurements <https://imos.aodn.org.au>`_ or `on S3 <http://imos-data.s3-website-ap-southeast-2.amazonaws.com/>`_
|
||||||
|
* `Marinexplore - Open Oceanographic Data <http://marinexplore.org/>`_
|
||||||
|
* `Smithsonian Institution Global Volcano and Eruption Database <http://volcano.si.edu/>`_
|
||||||
|
* `USGS Earthquake Archives <http://earthquake.usgs.gov/earthquakes/search/>`_
|
||||||
|
|
||||||
|
|
||||||
Economics
|
Economics
|
||||||
---------
|
---------
|
||||||
|
|
||||||
* `American Economic Ass (AEA) <https://www.aeaweb.org/RFE/toc.php?show=complete>`_
|
* `American Economic Association (AEA) <https://www.aeaweb.org/resources/data>`_
|
||||||
* `EconData from UMD <http://inforumweb.umd.edu/econdata/econdata.html>`_
|
* `EconData from UMD <http://inforumweb.umd.edu/econdata/econdata.html>`_
|
||||||
* `Economic Freedom of the World Data <http://www.freetheworld.com/datasets_efw.html>`_
|
* `Economic Freedom of the World Data <http://www.freetheworld.com/datasets_efw.html>`_
|
||||||
* `Historical MacroEconomc Statistics <http://www.historicalstatistics.org/>`_
|
* `Historical MacroEconomc Statistics <http://www.historicalstatistics.org/>`_
|
||||||
|
* `International Economics Database <http://widukind.cepremap.org/>`_ and `various data tools <https://github.com/Widukind>`_
|
||||||
* `International Trade Statistics <http://www.econostatistics.co.za/>`_
|
* `International Trade Statistics <http://www.econostatistics.co.za/>`_
|
||||||
* `Internet Product Code Database <http://www.upcdatabase.com/>`_
|
* `Internet Product Code Database <http://www.upcdatabase.com/>`_
|
||||||
* `Joint External Debt Data Hub <http://www.jedh.org/>`_
|
* `Joint External Debt Data Hub <http://www.jedh.org/>`_
|
||||||
|
@ -188,13 +201,18 @@ Energy
|
||||||
* `BLUEd <http://nilm.cmubi.org/>`_
|
* `BLUEd <http://nilm.cmubi.org/>`_
|
||||||
* `COMBED <http://combed.github.io/>`_
|
* `COMBED <http://combed.github.io/>`_
|
||||||
* `Dataport <https://dataport.pecanstreet.org/>`_
|
* `Dataport <https://dataport.pecanstreet.org/>`_
|
||||||
|
* `DRED <http://www.st.ewi.tudelft.nl/~akshay/dred/>`_
|
||||||
* `ECO <http://www.vs.inf.ethz.ch/res/show.html?what=eco-data>`_
|
* `ECO <http://www.vs.inf.ethz.ch/res/show.html?what=eco-data>`_
|
||||||
* `EIA <http://www.eia.gov/electricity/data/eia923/>`_
|
* `EIA <http://www.eia.gov/electricity/data/eia923/>`_
|
||||||
|
* `HES <http://randd.defra.gov.uk/Default.aspx?Menu=Menu&Module=More&Location=None&ProjectID=17359&FromSearch=Y&Publisher=1&SearchText=EV0702&SortString=ProjectCode&SortOrder=Asc&Paging=10#Description>`_ - Household Electricity Study, UK
|
||||||
* `HFED <http://hfed.github.io/>`_
|
* `HFED <http://hfed.github.io/>`_
|
||||||
* `iAWE <http://iawe.github.io/>`_
|
* `iAWE <http://iawe.github.io/>`_
|
||||||
* `Plaid <http://plaidplug.com/>`_
|
* `PLAID <http://plaidplug.com/>`_ - the Plug Load Appliance Identification Dataset
|
||||||
* `REDD <http://redd.csail.mit.edu/>`_
|
* `REDD <http://redd.csail.mit.edu/>`_
|
||||||
* `UK-Dale <http://www.doc.ic.ac.uk/~dk3810/data/>`_
|
* `Tracebase <https://www.tracebase.org>`_
|
||||||
|
* `UK-DALE <http://www.doc.ic.ac.uk/~dk3810/data/>`_ - UK Domestic Appliance-Level Electricity
|
||||||
|
* `WHITED <http://nilmworkshop.org/2016/proceedings/Poster_ID18.pdf>`_
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Finance
|
Finance
|
||||||
|
@ -209,23 +227,13 @@ Finance
|
||||||
* `Quandl <https://www.quandl.com/>`_
|
* `Quandl <https://www.quandl.com/>`_
|
||||||
* `St Louis Federal <https://research.stlouisfed.org/fred2/>`_
|
* `St Louis Federal <https://research.stlouisfed.org/fred2/>`_
|
||||||
* `Yahoo Finance <http://finance.yahoo.com/>`_
|
* `Yahoo Finance <http://finance.yahoo.com/>`_
|
||||||
* `NYSE Market Data <ftp://ftp.nyxdata.com>`_
|
* `NYSE Market Data <ftp://ftp.nyxdata.com>`_ (see FTP link on `RAW <https://raw.githubusercontent.com/caesar0301/awesome-public-datasets/master/README.rst>`_)
|
||||||
|
|
||||||
|
|
||||||
Geology
|
GIS
|
||||||
-------
|
---
|
||||||
|
|
||||||
* `Earth Models <http://www.earthmodels.org/>`_
|
|
||||||
* `Smithsonian Institution Global Volcano and Eruption Database <http://volcano.si.edu/>`_
|
|
||||||
* `USGS Earthquake Archives <http://earthquake.usgs.gov/earthquakes/search/>`_
|
|
||||||
|
|
||||||
|
|
||||||
GIS/Environment
|
|
||||||
---------------
|
|
||||||
|
|
||||||
* `BODC - marine data of ~22K vars <http://www.bodc.ac.uk/data/where_to_find_data/>`_
|
|
||||||
* `Cambridge, MA, US, GIS data on GitHub <http://cambridgegis.github.io/gisdata.html>`_
|
* `Cambridge, MA, US, GIS data on GitHub <http://cambridgegis.github.io/gisdata.html>`_
|
||||||
* `EOSDIS - NASA's earth observing system data <http://sedac.ciesin.columbia.edu/data/sets/browse>`_
|
|
||||||
* `Factual Global Location Data <https://www.factual.com/>`_
|
* `Factual Global Location Data <https://www.factual.com/>`_
|
||||||
* `Geo Spatial Data from ASU <http://geodacenter.asu.edu/datalist/>`_
|
* `Geo Spatial Data from ASU <http://geodacenter.asu.edu/datalist/>`_
|
||||||
* `Geo Wiki Project - Citizen-driven Environmental Monitoring <http://geo-wiki.org/>`_
|
* `Geo Wiki Project - Citizen-driven Environmental Monitoring <http://geo-wiki.org/>`_
|
||||||
|
@ -233,11 +241,8 @@ GIS/Environment
|
||||||
* `GeoNames Worldwide <http://www.geonames.org/>`_
|
* `GeoNames Worldwide <http://www.geonames.org/>`_
|
||||||
* `Global Administrative Areas Database (GADM) <http://www.gadm.org/>`_
|
* `Global Administrative Areas Database (GADM) <http://www.gadm.org/>`_
|
||||||
* `Homeland Infrastructure Foundation-Level Data <https://hifld-dhs-gii.opendata.arcgis.com/>`_
|
* `Homeland Infrastructure Foundation-Level Data <https://hifld-dhs-gii.opendata.arcgis.com/>`_
|
||||||
* `Integrated Marine Observing System (IMOS) - roughly 30TB of ocean measurements <https://imos.aodn.org.au>`_ or `on S3 <http://imos-data.s3-website-ap-southeast-2.amazonaws.com/>`_
|
|
||||||
* `International Institute for Systems Analysis - GIS Datasets <http://www.iiasa.ac.at/web/home/research/modelsData/Models--Tools--Data.en.html>`_
|
|
||||||
* `Landsat 8 on AWS <https://aws.amazon.com/public-data-sets/landsat/>`_
|
* `Landsat 8 on AWS <https://aws.amazon.com/public-data-sets/landsat/>`_
|
||||||
* `List of all countries in all languages <https://github.com/umpirsky/country-list>`_
|
* `List of all countries in all languages <https://github.com/umpirsky/country-list>`_
|
||||||
* `Marinexplore - Open Oceanographic Data <http://marinexplore.org/>`_
|
|
||||||
* `National Weather Service GIS Data Portal <http://www.nws.noaa.gov/gis/>`_
|
* `National Weather Service GIS Data Portal <http://www.nws.noaa.gov/gis/>`_
|
||||||
* `Natural Earth - vectors and rasters of the world <http://www.naturalearthdata.com/>`_
|
* `Natural Earth - vectors and rasters of the world <http://www.naturalearthdata.com/>`_
|
||||||
* `OpenAddresses <http://openaddresses.io/>`_
|
* `OpenAddresses <http://openaddresses.io/>`_
|
||||||
|
@ -251,6 +256,7 @@ GIS/Environment
|
||||||
* `World boundaries from the U.S. Department of State <https://hiu.state.gov/data/data.aspx>`_
|
* `World boundaries from the U.S. Department of State <https://hiu.state.gov/data/data.aspx>`_
|
||||||
* `World countries in multiple formats <https://github.com/mledoze/countries>`_
|
* `World countries in multiple formats <https://github.com/mledoze/countries>`_
|
||||||
|
|
||||||
|
|
||||||
Government
|
Government
|
||||||
----------
|
----------
|
||||||
|
|
||||||
|
@ -281,11 +287,13 @@ Image Processing
|
||||||
* `2GB of Photos of Cats <http://137.189.35.203/WebUI/CatDatabase/catData.html>`_ or `Archive version <https://web.archive.org/web/20150520175645/http://137.189.35.203/WebUI/CatDatabase/catData.html>`_
|
* `2GB of Photos of Cats <http://137.189.35.203/WebUI/CatDatabase/catData.html>`_ or `Archive version <https://web.archive.org/web/20150520175645/http://137.189.35.203/WebUI/CatDatabase/catData.html>`_
|
||||||
* `Affective Image Classification <http://www.imageemotion.org/>`_
|
* `Affective Image Classification <http://www.imageemotion.org/>`_
|
||||||
* `Animals with attributes <http://attributes.kyb.tuebingen.mpg.de/>`_
|
* `Animals with attributes <http://attributes.kyb.tuebingen.mpg.de/>`_
|
||||||
|
* `Chars74K dataset, Character Recognition in Natural Images (both English and Kannada are available) <http://www.ee.surrey.ac.uk/CVSSP/demos/chars74k/>`_
|
||||||
* `Face Recognition Benchmark <http://www.face-rec.org/databases/>`_
|
* `Face Recognition Benchmark <http://www.face-rec.org/databases/>`_
|
||||||
* `ImageNet (in WordNet hierarchy) <http://www.image-net.org/>`_
|
* `ImageNet (in WordNet hierarchy) <http://www.image-net.org/>`_
|
||||||
* `Indoor Scene Recognition <http://web.mit.edu/torralba/www/indoor.html>`_
|
* `Indoor Scene Recognition <http://web.mit.edu/torralba/www/indoor.html>`_
|
||||||
* `International Affective Picture System, UFL <http://csea.phhp.ufl.edu/media/iapsmessage.html>`_
|
* `International Affective Picture System, UFL <http://csea.phhp.ufl.edu/media/iapsmessage.html>`_
|
||||||
* `Massive Visual Memory Stimuli, MIT <http://cvcl.mit.edu/MM/stimuli.html>`_
|
* `Massive Visual Memory Stimuli, MIT <http://cvcl.mit.edu/MM/stimuli.html>`_
|
||||||
|
* `MNIST database of handwritten digits, near 1 million examples <http://yann.lecun.com/exdb/mnist/>`_
|
||||||
* `Several Shape-from-Silhouette Datasets <http://kaiwolf.no-ip.org/3d-model-repository.html>`_
|
* `Several Shape-from-Silhouette Datasets <http://kaiwolf.no-ip.org/3d-model-repository.html>`_
|
||||||
* `Stanford Dogs Dataset <http://vision.stanford.edu/aditya86/ImageNetDogs/>`_
|
* `Stanford Dogs Dataset <http://vision.stanford.edu/aditya86/ImageNetDogs/>`_
|
||||||
* `SUN database, MIT <http://groups.csail.mit.edu/vision/SUN/hierarchy.html>`_
|
* `SUN database, MIT <http://groups.csail.mit.edu/vision/SUN/hierarchy.html>`_
|
||||||
|
@ -308,6 +316,7 @@ Machine Learning
|
||||||
* `Machine Learning Data Set Repository <http://mldata.org/>`_
|
* `Machine Learning Data Set Repository <http://mldata.org/>`_
|
||||||
* `Million Song Dataset <http://labrosa.ee.columbia.edu/millionsong/>`_
|
* `Million Song Dataset <http://labrosa.ee.columbia.edu/millionsong/>`_
|
||||||
* `More Song Datasets <http://labrosa.ee.columbia.edu/millionsong/pages/additional-datasets>`_
|
* `More Song Datasets <http://labrosa.ee.columbia.edu/millionsong/pages/additional-datasets>`_
|
||||||
|
* `New Yorker caption contest ratings <https://github.com/nextml/caption-contest-data>`_
|
||||||
* `MovieLens Data Sets <http://grouplens.org/datasets/movielens/>`_
|
* `MovieLens Data Sets <http://grouplens.org/datasets/movielens/>`_
|
||||||
* `RDataMining - "R and Data Mining" ebook data <http://www.rdatamining.com/data>`_
|
* `RDataMining - "R and Data Mining" ebook data <http://www.rdatamining.com/data>`_
|
||||||
* `Registered Meteorites on Earth <http://healthintelligence.drupalgardens.com/content/registered-meteorites-has-impacted-earth-visualized>`_
|
* `Registered Meteorites on Earth <http://healthintelligence.drupalgardens.com/content/registered-meteorites-has-impacted-earth-visualized>`_
|
||||||
|
@ -339,18 +348,43 @@ Natural Language
|
||||||
* `Flickr Personal Taxonomies <http://www.isi.edu/~lerman/downloads/flickr/flickr_taxonomies.html>`_
|
* `Flickr Personal Taxonomies <http://www.isi.edu/~lerman/downloads/flickr/flickr_taxonomies.html>`_
|
||||||
* `Freebase.com of people, places, and things <http://www.freebase.com/>`_
|
* `Freebase.com of people, places, and things <http://www.freebase.com/>`_
|
||||||
* `Google Books Ngrams (2.2TB) <https://aws.amazon.com/datasets/google-books-ngrams/>`_
|
* `Google Books Ngrams (2.2TB) <https://aws.amazon.com/datasets/google-books-ngrams/>`_
|
||||||
|
* `Google MC-AFP, generated based on the public available Gigaword dataset using Paragraph Vectors <https://github.com/google/mcafp>`_
|
||||||
* `Google Web 5gram (1TB, 2006) <https://catalog.ldc.upenn.edu/LDC2006T13>`_
|
* `Google Web 5gram (1TB, 2006) <https://catalog.ldc.upenn.edu/LDC2006T13>`_
|
||||||
* `Gutenberg eBooks List <http://www.gutenberg.org/wiki/Gutenberg:Offline_Catalogs>`_
|
* `Gutenberg eBooks List <http://www.gutenberg.org/wiki/Gutenberg:Offline_Catalogs>`_
|
||||||
* `Hansards text chunks of Canadian Parliament <http://www.isi.edu/natural-language/download/hansard/>`_
|
* `Hansards text chunks of Canadian Parliament <http://www.isi.edu/natural-language/download/hansard/>`_
|
||||||
* `Machine Comprehension Test (MCTest) of text from Microsoft Research <http://research.microsoft.com/en-us/um/redmond/projects/mctest/index.html>`_
|
* `Machine Comprehension Test (MCTest) of text from Microsoft Research <http://research.microsoft.com/en-us/um/redmond/projects/mctest/index.html>`_
|
||||||
* `Machine Translation of European languages <http://statmt.org/wmt11/translation-task.html#download>`_
|
* `Machine Translation of European languages <http://statmt.org/wmt11/translation-task.html#download>`_
|
||||||
|
* `Multi-Domain Sentiment Dataset (version 2.0) <http://www.cs.jhu.edu/~mdredze/datasets/sentiment/>`_
|
||||||
|
* `Microsoft MAchine Reading COmprehension Dataset (or MS MARCO) <http://www.msmarco.org/dataset.aspx>`_
|
||||||
* `Personae Corpus <http://www.clips.uantwerpen.be/datasets/personae-corpus>`_
|
* `Personae Corpus <http://www.clips.uantwerpen.be/datasets/personae-corpus>`_
|
||||||
* `SaudiNewsNet Collection of Saudi Newspaper Articles (Arabic, 30K articles) <https://github.com/ParallelMazen/SaudiNewsNet>`_
|
* `SaudiNewsNet Collection of Saudi Newspaper Articles (Arabic, 30K articles) <https://github.com/ParallelMazen/SaudiNewsNet>`_
|
||||||
* `SMS Spam Collection in English <http://www.dt.fee.unicamp.br/~tiago/smsspamcollection/>`_
|
* `SMS Spam Collection in English <http://www.dt.fee.unicamp.br/~tiago/smsspamcollection/>`_
|
||||||
* `USENET postings corpus of 2005~2011 <http://www.psych.ualberta.ca/~westburylab/downloads/usenetcorpus.download.html>`_
|
* `USENET postings corpus of 2005~2011 <http://www.psych.ualberta.ca/~westburylab/downloads/usenetcorpus.download.html>`_
|
||||||
* `Wikidata - Wikipedia databases <https://www.wikidata.org/wiki/Wikidata:Database_download>`_
|
* `Wikidata - Wikipedia databases <https://www.wikidata.org/wiki/Wikidata:Database_download>`_
|
||||||
* `Wikipedia Links data - 40 Million Entities in Context <https://code.google.com/p/wiki-links/downloads/list>`_
|
* `Wikipedia Links data - 40 Million Entities in Context <https://code.google.com/p/wiki-links/downloads/list>`_
|
||||||
|
* `Universal Dependencies <http://universaldependencies.org>`_
|
||||||
* `WordNet databases and tools <http://wordnet.princeton.edu/wordnet/download/>`_
|
* `WordNet databases and tools <http://wordnet.princeton.edu/wordnet/download/>`_
|
||||||
|
* `Open Multilingual Wordnet <http://compling.hss.ntu.edu.sg/omw/>`_
|
||||||
|
* `Automatic Keyphrase Extracttion <https://github.com/snkim/AutomaticKeyphraseExtraction/>`_
|
||||||
|
|
||||||
|
|
||||||
|
Neuroscience
|
||||||
|
-------------
|
||||||
|
|
||||||
|
* `Allen Institute Datasets <http://www.brain-map.org/>`_
|
||||||
|
* `Brain Catalogue <http://braincatalogue.org/>`_
|
||||||
|
* `Brainomics <http://brainomics.cea.fr/localizer>`_
|
||||||
|
* `CodeNeuro Datasets <http://datasets.codeneuro.org/>`_
|
||||||
|
* `Collaborative Research in Computational Neuroscience (CRCNS) <http://crcns.org/data-sets>`_
|
||||||
|
* `FCP-INDI <http://fcon_1000.projects.nitrc.org/index.html>`_
|
||||||
|
* `Human Connectome Project <http://www.humanconnectome.org/data/>`_
|
||||||
|
* `NDAR <https://ndar.nih.gov/>`_
|
||||||
|
* `NIMH Data Archive <http://data-archive.nimh.nih.gov/>`_
|
||||||
|
* `NeuroData <http://neurodata.io>`_
|
||||||
|
* `OASIS <http://www.oasis-brains.org/>`_
|
||||||
|
* `OpenfMRI <https://openfmri.org/>`_
|
||||||
|
* `Neuroelectro <http://neuroelectro.org/>`_
|
||||||
|
* `Study Forrest <http://studyforrest.org>`_
|
||||||
|
|
||||||
|
|
||||||
Physics
|
Physics
|
||||||
|
@ -383,6 +417,7 @@ Public Domains
|
||||||
* `Infochimps <http://www.infochimps.com/>`_
|
* `Infochimps <http://www.infochimps.com/>`_
|
||||||
* `KDNuggets Data Collections <http://www.kdnuggets.com/datasets/index.html>`_
|
* `KDNuggets Data Collections <http://www.kdnuggets.com/datasets/index.html>`_
|
||||||
* `Microsoft Azure Data Market Free DataSets <http://datamarket.azure.com/browse/data?price=free>`_
|
* `Microsoft Azure Data Market Free DataSets <http://datamarket.azure.com/browse/data?price=free>`_
|
||||||
|
* `Microsoft Data Science for Research <http://aka.ms/Data-Science>`_
|
||||||
* `Numbray <http://numbrary.com/>`_
|
* `Numbray <http://numbrary.com/>`_
|
||||||
* `Open Library Data Dumps <https://openlibrary.org/developers/dumps>`_
|
* `Open Library Data Dumps <https://openlibrary.org/developers/dumps>`_
|
||||||
* `Reddit Datasets <https://www.reddit.com/r/datasets>`_
|
* `Reddit Datasets <https://www.reddit.com/r/datasets>`_
|
||||||
|
@ -424,7 +459,6 @@ Social Networks
|
||||||
* `Facebook Data Scrape (2005) <https://archive.org/details/oxford-2005-facebook-matrix>`_
|
* `Facebook Data Scrape (2005) <https://archive.org/details/oxford-2005-facebook-matrix>`_
|
||||||
* `Facebook Social Networks from LAW (since 2007) <http://law.di.unimi.it/datasets.php>`_
|
* `Facebook Social Networks from LAW (since 2007) <http://law.di.unimi.it/datasets.php>`_
|
||||||
* `Foursquare from UMN/Sarwat (2013) <https://archive.org/details/201309_foursquare_dataset_umn>`_
|
* `Foursquare from UMN/Sarwat (2013) <https://archive.org/details/201309_foursquare_dataset_umn>`_
|
||||||
* `GetGlue - users rating TV shows <http://getglue-data.s3.amazonaws.com/getglue_sample.tar.gz>`_
|
|
||||||
* `GitHub Collaboration Archive <https://www.githubarchive.org/>`_
|
* `GitHub Collaboration Archive <https://www.githubarchive.org/>`_
|
||||||
* `Google Scholar citation relations <http://www3.cs.stonybrook.edu/~leman/data/gscholar.db>`_
|
* `Google Scholar citation relations <http://www3.cs.stonybrook.edu/~leman/data/gscholar.db>`_
|
||||||
* `High-Resolution Contact Networks from Wearable Sensors <http://www.sociopatterns.org/datasets/>`_
|
* `High-Resolution Contact Networks from Wearable Sensors <http://www.sociopatterns.org/datasets/>`_
|
||||||
|
@ -465,6 +499,7 @@ Social Sciences
|
||||||
* `International Studies Compendium Project <http://www.isacompendium.com/public/>`_
|
* `International Studies Compendium Project <http://www.isacompendium.com/public/>`_
|
||||||
* `James McGuire Cross National Data <http://jmcguire.faculty.wesleyan.edu/welcome/cross-national-data/>`_
|
* `James McGuire Cross National Data <http://jmcguire.faculty.wesleyan.edu/welcome/cross-national-data/>`_
|
||||||
* `MacroData Guide by Norsk samfunnsvitenskapelig datatjeneste <http://nsd.uib.no>`_
|
* `MacroData Guide by Norsk samfunnsvitenskapelig datatjeneste <http://nsd.uib.no>`_
|
||||||
|
* `Minnesota Population Center <https://www.ipums.org/>`_
|
||||||
* `MIT Reality Mining Dataset <http://realitycommons.media.mit.edu/realitymining.html>`_
|
* `MIT Reality Mining Dataset <http://realitycommons.media.mit.edu/realitymining.html>`_
|
||||||
* `Open Crime and Policing Data in England, Wales and Northern Ireland <https://data.police.uk/data/>`_
|
* `Open Crime and Policing Data in England, Wales and Northern Ireland <https://data.police.uk/data/>`_
|
||||||
* `Paul Hensel General International Data Page <http://www.paulhensel.org/dataintl.html>`_
|
* `Paul Hensel General International Data Page <http://www.paulhensel.org/dataintl.html>`_
|
||||||
|
@ -474,8 +509,9 @@ Social Sciences
|
||||||
* `StackExchange Data Explorer <http://data.stackexchange.com/help>`_
|
* `StackExchange Data Explorer <http://data.stackexchange.com/help>`_
|
||||||
* `Terrorism Research and Analysis Consortium <http://www.trackingterrorism.org/>`_
|
* `Terrorism Research and Analysis Consortium <http://www.trackingterrorism.org/>`_
|
||||||
* `Texas Inmates Executed Since 1984 <http://www.tdcj.state.tx.us/death_row/dr_executed_offenders.html>`_
|
* `Texas Inmates Executed Since 1984 <http://www.tdcj.state.tx.us/death_row/dr_executed_offenders.html>`_
|
||||||
* `Titanic Survival Data Set <https://github.com/caesar0301/awesome-public-datasets/tree/master/Datasets>`_
|
* `Titanic Survival Data Set <https://github.com/caesar0301/awesome-public-datasets/tree/master/Datasets>`_ or `on Kaggle <https://www.kaggle.com/c/titanic/data>`_
|
||||||
* `UCB's Archive of Social Science Data (D-Lab) <http://ucdata.berkeley.edu/>`_
|
* `UCB's Archive of Social Science Data (D-Lab) <http://ucdata.berkeley.edu/>`_
|
||||||
|
* `Uppsala Conflict Data Program <http://ucdp.uu.se/>`_
|
||||||
* `UCLA Social Sciences Data Archive <http://dataarchives.ss.ucla.edu/Home.DataPortals.htm>`_
|
* `UCLA Social Sciences Data Archive <http://dataarchives.ss.ucla.edu/Home.DataPortals.htm>`_
|
||||||
* `UN Civil Society Database <http://esango.un.org/civilsociety/>`_
|
* `UN Civil Society Database <http://esango.un.org/civilsociety/>`_
|
||||||
* `Universities Worldwide <http://univ.cc/>`_
|
* `Universities Worldwide <http://univ.cc/>`_
|
||||||
|
@ -533,7 +569,7 @@ Transportation
|
||||||
* `RITA Airline On-Time Performance data <http://www.transtats.bts.gov/Tables.asp?DB_ID=120>`_
|
* `RITA Airline On-Time Performance data <http://www.transtats.bts.gov/Tables.asp?DB_ID=120>`_
|
||||||
* `RITA/BTS transport data collection (TranStat) <http://www.transtats.bts.gov/DataIndex.asp>`_
|
* `RITA/BTS transport data collection (TranStat) <http://www.transtats.bts.gov/DataIndex.asp>`_
|
||||||
* `Toronto Bike Share Stations (XML file) <http://www.bikesharetoronto.com/data/stations/bikeStations.xml>`_
|
* `Toronto Bike Share Stations (XML file) <http://www.bikesharetoronto.com/data/stations/bikeStations.xml>`_
|
||||||
* `Transport for London (TFL) <https://tfl.gov.uk/info-for/open-data-users/our-feeds>`_
|
* `Transport for London (TFL) <https://tfl.gov.uk/info-for/open-data-users/data-feeds>`_
|
||||||
* `Travel Tracker Survey (TTS) for Chicago <http://www.cmap.illinois.gov/data/transportation/travel-tracker-survey>`_
|
* `Travel Tracker Survey (TTS) for Chicago <http://www.cmap.illinois.gov/data/transportation/travel-tracker-survey>`_
|
||||||
* `U.S. Bureau of Transportation Statistics (BTS) <http://www.rita.dot.gov/bts/>`_
|
* `U.S. Bureau of Transportation Statistics (BTS) <http://www.rita.dot.gov/bts/>`_
|
||||||
* `U.S. Domestic Flights 1990 to 2009 <http://academictorrents.com/details/a2ccf94bbb4af222bf8e69dad60a68a29f310d9a>`_
|
* `U.S. Domestic Flights 1990 to 2009 <http://academictorrents.com/details/a2ccf94bbb4af222bf8e69dad60a68a29f310d9a>`_
|
||||||
|
@ -551,4 +587,3 @@ Complementary Collections
|
||||||
* Quora: `Where can I find large datasets open to the public? <http://www.quora.com/Where-can-I-find-large-datasets-open-to-the-public>`_
|
* Quora: `Where can I find large datasets open to the public? <http://www.quora.com/Where-can-I-find-large-datasets-open-to-the-public>`_
|
||||||
* RS.io: `100+ Interesting Data Sets for Statistics <http://rs.io/100-interesting-data-sets-for-statistics/>`_
|
* RS.io: `100+ Interesting Data Sets for Statistics <http://rs.io/100-interesting-data-sets-for-statistics/>`_
|
||||||
* StaTrek: `Leveraging open data to understand urban lives <http://xiaming.me/posts/2014/10/23/leveraging-open-data-to-understand-urban-lives/>`_
|
* StaTrek: `Leveraging open data to understand urban lives <http://xiaming.me/posts/2014/10/23/leveraging-open-data-to-understand-urban-lives/>`_
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user