Import Yelp dataset to MongoDB

Ref link

Install MongoDB

http://docs.mongodb.org/manual/tutorial/install-mongodb-on-os-x/

Using MongoDB

http://docs.mongodb.org/manual/tutorial/getting-started/

Import Yelp Dataset

 mongoimport --db yelp --collection businesses yelp_academic_dataset_business.json
 mongoimport --db yelp --collection users yelp_academic_dataset_user.json
 mongoimport --db yelp --collection reviews yelp_academic_dataset_review.json
 mongoimport --db yelp --collection checkins yelp_academic_dataset_checkin.json
 mongoimport --db yelp --collection tips yelp_academic_dataset_tip.json

Simple Queries for Explore Data

Get all the reviews corresponding to particular business from reviews collection (run from terminal’s outside)
mongoexport --db yelp --collection reviews --fields date,business_id --query '{"business_id":"rv7CY8G_XibTx82YhuqQRw"}' --csv --out test.txt
Take a look at business collection
db.businesses.findOne({})
{
 "_id" : ObjectId("54d9ac6438705f91be8ec9c7"),
 "business_id" : "vcNAWiLM4dR7D2nwwJ7nCA",
"full_address" : "4840 E Indian School Rd\nSte 101\nPhoenix, AZ 85018",
 "hours" : {
 "Tuesday" : {
 "close" : "17:00",
 "open" : "08:00"
 },
 "Friday" : {
 "close" : "17:00",
 "open" : "08:00"
 },
 "Monday" : {
 "close" : "17:00",
 "open" : "08:00"
 },
 "Wednesday" : {
 "close" : "17:00",
 "open" : "08:00"
 },
 "Thursday" : {
 "close" : "17:00",
 "open" : "08:00"
 }
 },
 "open" : true,
 "categories" : [
 "Doctors",
 "Health & Medical"
 ],
 "city" : "Phoenix",
 "review_count" : 9,
 "name" : "Eric Goldberg, MD",
 "neighborhoods" : [ ],
 "longitude" : -111.983758,
 "state" : "AZ",
 "stars" : 3.5,
 "latitude" : 33.499313,
 "attributes" : {
 "By Appointment Only" : true
 },
 "type" : "business"
}
Find all restaurants  which is good for dinner , having a romantic ambiance  with a price range greater that or equal to three and is having more than 100 reviews
db.businesses.find({$and:[{categories:{$in:["Restaurants"]}}, {"attributes.Good For.dinner":true},{"attributes.Ambience.romantic":true},{review_count:{$gt:100}},{"attributes.Price Range":{$gte:3}}]})
Find the reviews of Romantic, good for dinner restaurants having a useful votes greater than or equal to 5 
var myCursor=db.businesses.find({$and:[{categories:{$in:["Restaurants"]}}, {"attributes.Good For.dinner":true}, {"attributes.Ambience.romantic":true},{review_count:{$gt:100}},{"attributes.Price Range":{$gte:3}}]},{business_id:true, _id: false})
while(myCursor.hasNext()){
 db.reviews.find({$and:[{business_id:myCursor.next().business_id},{"votes.useful":{$gt: 5}}]});
}
Cross checking of data – Count of reviews in Review and business
db.businesses.find({business_id : "WgsKcV4zZ2JnxUj9lKdn9Q"});
db.reviews.find({business_id : "WgsKcV4zZ2JnxUj9lKdn9Q"}).count();
Query the reviews by grouping
db.reviews.aggregate([ { $group : { _id : "$business_id", count: { $sum: 1 } } }])

Add stars_distribution fields to users, businesses collections
db.businesses.update({},{$set : {"stars_distribution" : {"one" : 0, "two" : 0, "three" : 0, "four" : 0, "five" : 0}}},false,true)
db.users.update({},{$set : {"stars_distribution" : {"one" : 0, "two" : 0, "three" : 0, "four" : 0, "five" : 0}}},false,true)

#

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s