Question: Simple $avg Query for Nodejs

Question

Simple $avg Query for Nodejs

Answers 1
Added at 2017-01-03 04:01
Tags
Question

This are all the codes that i have to make the app. So i am able to eitherimport the data/records from a csv file or insert manually from the app at localhost:3000 and it will be inserted into the database. From there on i will be able to query from here. The error lies in server.js at the bottom. Both the bulk and the avg dont seem to have executed.

server.js which include the bulk (for 1000data and more) and average query(air_temperature)

var express = require('express');
var app = express();
var mongojs = require('mongojs');
var db = mongojs('meibanlist', ['meibanlist']);
var bodyParser = require('body-parser');

app.use(express.static(__dirname + '/public'));
app.use(bodyParser.json());

app.get('/meibanlist', function (req, res) {
  console.log('I received a GET request');

  db.meibanlist.find(function (err, docs) {
    console.log(docs);
    res.json(docs);
  });
});

app.post('/meibanlist', function (req, res) {
  console.log(req.body);
  db.meibanlist.insert(req.body, function(err, doc) {
    res.json(doc);
  });
});

app.delete('/meibanlist/:id', function (req, res) {
  var id = req.params.id;
  console.log(id);
  db.meibanlist.remove({_id: mongojs.ObjectId(id)}, function (err, doc) {
    res.json(doc);
  });
});

app.get('/meibanlist/:id', function (req, res) {
  var id = req.params.id;
  console.log(id);
  db.meibanlist.findOne({_id: mongojs.ObjectId(id)}, function (err, doc) {
    res.json(doc);
  });
});

app.put('/meibanlist/:id', function (req, res) {
  var id = req.params.id;
  console.log(req.body.machine_unit);
  db.meibanlist.findAndModify({
    query: {_id: mongojs.ObjectId(id)},
    update: {$set: {machine_unit: req.body.machine_unit, air_temperature: req.body.air_temperature, water_temperature: req.body.water_temperature, heat_temperature: req.body.heat_temperature, room_temperature: req.body.room_temperature, date: req.body.date, time: req.body.time}},
    new: true}, function (err, doc) {
      res.json(doc);
    }
  );
});

var cursor = db.meibanlist.find({"air_temperature": { "$exists": true, "$type": 2 }}),
    bulkUpdateOps = [];

cursor.forEach(function(doc){ 
    var newTemp = parseInt(doc.air_temperature, 10);
    bulkUpdateOps.push({ 
        "updateOne": {
            "filter": { "_id": doc._id },
            "update": { "$set": { "air_temperature": newTemp } }
         }
    });

    if (bulkUpdateOps.length === 500) {
        db.meibanlist.bulkWrite(bulkUpdateOps);
        bulkUpdateOps = [];
    }
});         

if (bulkUpdateOps.length > 0) { db.meibanlist.bulkWrite(bulkUpdateOps); }

db.meibanlist.aggregate([
    {
        "$group": {
            "_id": null,
            "averageAirTemperature": { "$avg": "$air_temperature" } 
        }
    }
], function(err, results){
    if (err || !results) console.log ("record not found");
    else console.log(results[0]["averageAirTemperature"]);        
}); 

app.listen(3000);
console.log("Server running on port 3000");

index.html

<!DOCTYPE>
<html ng-app="myApp">
<head>
<!-- Latest compiled and minified CSS -->
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.1/css/bootstrap.min.css">

<!-- Optional theme -->
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.1/css/bootstrap-theme.min.css">

  <title>Meiban App</title>
</head>
<body>
  <div class="container" ng-controller="AppCtrl">
    <h1>Meiban App</h1>

    <table class="table">
      <thead>
        <tr>
          <th>Machine unit</th>         
          <th>Air Temperature</th>
          <th>Water Temperature</th>
          <th>Heat Temperature</th>       
          <th>Room Temperature</th>
          <th>Date</th>
          <th>Time</th>
          <th>Action</th>

          <th>&nbsp;</th>
        </tr>
      </thead>
      <tbody>
        <tr>
          <td><input class="form-control" ng-model="contact.machine_unit"></td>
          <td><input class="form-control" ng-model="contact.air_temperature"></td>
          <td><input class="form-control" ng-model="contact.water_temperature"></td>
          <td><input class="form-control" ng-model="contact.heat_temperature"></td>
          <td><input class="form-control" ng-model="contact.room_temperature"></td>
          <td><input class="form-control" ng-model="contact.date"></td>
          <td><input class="form-control" ng-model="contact.time"></td>
          <td><button class="btn btn-primary" ng-click="addCollection()">Add Collection</button></td>
          <td><button class="btn btn-info" ng-click="update()">Update</button>&nbsp;&nbsp;<button class="btn btn-info" ng-click="deselect()">Clear</button></td>
        </tr>
        <tr ng-repeat="contact in collection">
          <td>{{contact.machine_unit}}</td>
          <td>{{contact.air_temperature}}</td>
          <td>{{contact.water_temperature}}</td>
          <td>{{contact.heat_temperature}}</td>
          <td>{{contact.room_temperature}}</td>
          <td>{{contact.date}}</td>
          <td>{{contact.time}}</td>
          <td><button class="btn btn-danger" ng-click="remove(contact._id)">Remove</button></td>
          <td><button class="btn btn-warning" ng-click="edit(contact._id)">Edit</button></td>
        </tr>
      </tbody>
    </table>

  </div>
<script src="https://ajax.googleapis.com/ajax/libs/angularjs/1.3.12/angular.min.js"></script>
<script src="controller/controller.js"></script>
</body>
</html>

controller.js

var myApp = angular.module('myApp', []);
myApp.controller('AppCtrl', ['$scope', '$http', function($scope, $http) {
    console.log("Hello World from controller");


var refresh = function() {
  $http.get('/meibanlist').success(function(response) {
    console.log("I got the data I requested");
    $scope.meibanlist = response;
    $scope.contact = "";
  });
};

refresh();

$scope.addCollection = function() {
  console.log($scope.contact);
  $http.post('/meibanlist', $scope.contact).success(function(response) {
    console.log(response);
    refresh();
  });
};

$scope.remove = function(id) {
  console.log(id);
  $http.delete('/meibanlist/' + id).success(function(response) {
    refresh();
  });
};

$scope.edit = function(id) {
  console.log(id);
  $http.get('/meibanlist/' + id).success(function(response) {
    $scope.contact = response;
  });
};  

$scope.update = function() {
  console.log($scope.contact._id);
  $http.put('/meibanlist/' + $scope.contact._id, $scope.contact).success(function(response) {
    refresh();
  })
};

$scope.deselect = function() {
  $scope.contact = "";
}

}]);
Answers
nr: #1 dodano: 2017-01-03 11:01

As the documentation points out, the $avg operator works in two pipeline steps, the $project and $group stages. I believe you need to use the $avg accumulator in the $group pipeline since you want the average of all the prices.

When used in the $group stage, $avg returns the collective average of all the numeric values that result from applying a specified expression to each document in a group of documents that share the same group by key noted by the _id field.

The _id field is mandatory; however, since you want to calculate accumulated average values for all the input documents as a whole you can specify an _id value of null:

db.database.aggregate([
    {
        "$group": {
            "_id": null,
            "Average": { "$avg": "$price" } 
        }
    }
], function(err, results){
    if (err !results) console.log ("record not found");
    else console.log(results[0]["Average"]);        
});

Collating info from the comments and subsequent updates to your question, you are hitting a snag because the values in your are not numerical hence the reason why $avg is not working.

You would need to convert the string values to numerical ones by using the update() method. The concept here is to loop through your collection with a cursor and for each document within the cursor, update the document using $set with the new values from parseInt().

Supposing your collection is not that humongous, the intuition above can be implemented using the forEach() method of the cursor to iterate it and update each document in the collection that matches a certain criteria.

The following mongo shell demonstration highlights this approach for small datasets:

mongo shell

db.meibanlist.find({"air_temperature": { "$exists": true, "$type": 2 }})
    .snapshot()
    .forEach(function(doc){ 
        var newTemp = parseInt(doc.air_temperature, 10);
        db.meibanlist.updateOne(
            { "_id": doc._id },
            { "$set": { "air_temperature": newTemp } }
        );
    });

Now for improved performance especially when dealing with large collections, take advantage of using a Bulk() API for updating the collection in bulk.

This is quite efficient as opposed to the above operations because with the bulk API you will be sending the operations to the server in batches (for example, say a batch size of 500) which gives you much better performance since you won't be sending every request to the server but just once in every 500 requests, thus making your updates more efficient and quicker.

The following examples demonstrate using the Bulk() API available in MongoDB versions >= 2.6 and < 3.2.

mongo shell

var bulkUpdateOps = db.meibanlist.initializeUnOrderedBulkOp(),   
    counter = 0;

db.meibanlist.find({"air_temperature": { "$exists": true, "$type": 2 }})
    .snapshot()
    .forEach(function(doc){ 
        var newTemp = parseInt(doc.air_temperature, 10);
        bulkUpdateOps.find({ "_id": doc._id })
            .update({ "$set": { "air_temperature": newTemp } })

        counter++;  // increment counter for batch limit
        if (counter % 500 === 0) { 
            // execute the bulk update operation in batches of 1000
            bulkUpdateOps.execute(); 
            // Re-initialize the bulk update operations object
            bulkUpdateOps = db.meibanlist.initializeUnOrderedBulkOp();
        } 
})

// Clean up remaining operation in the queue
if (counter % 500 !== 0) { bulkUpdateOps.execute(); }

The next example applies to MongoDB versions 3.2 and newer which has since deprecated the Bulk() API and provided a newer set of apis using bulkWrite().

It uses the same cursors as above but creates the arrays with the bulk operations using the same forEach() cursor method to push each bulk write document to the array. Because write commands can accept no more than 1000 operations, you will need to group your operations to have at most 1000 operations and re-intialise the array when loop hit the 1000 iteration. The counter variable above is there to manage your bulk updates effectively if your collection is large. It allows you to batch the update operations and sends the writes to the server in batches of 500 which gives you a better performance as you are not sending every request to the server, just once in every 500 requests.

For bulk operations MongoDB imposes a default internal limit of 1000 operations per batch and so the choice of 500 documents is good in the sense that you have some control over the batch size rather than let MongoDB impose the default, i.e. for larger operations in the magnitude of > 1000 documents.

var cursor = db.meibanlist.find({"air_temperature": { "$exists": true, "$type": 2 }}),
    bulkUpdateOps = [];

cursor.forEach(function(doc){ 
    var newTemp = parseInt(doc.air_temperature, 10);
    bulkUpdateOps.push({ 
        "updateOne": {
            "filter": { "_id": doc._id },
            "update": { "$set": { "air_temperature": newTemp } }
         }
    });

    if (bulkUpdateOps.length === 500) {
        db.meibanlist.bulkWrite(bulkUpdateOps);
        bulkUpdateOps = [];
    }
});         

if (bulkUpdateOps.length > 0) { db.meibanlist.bulkWrite(bulkUpdateOps); }

Once you update the collection you can then happily apply the $avg operator in your aggregate() function. The following will return the average air_temperature for all the documents combined:

db.meibanlist.aggregate([
    {
        "$group": {
            "_id": null,
            "averageAirTemperature": { "$avg": "$air_temperature" } 
        }
    }
], function(err, results){
    if (err !results) console.log ("record not found");
    else console.log(results[0]["averageAirTemperature"]);        
}); 

UPDATE

The above code works in mongo shell. For a mongojs version, encapsulate the operations within functions that you can call whenever the need arises. For example, you can place the bulk update logic within its own function bulkUpdate:

function bulkUpdate(collection, callback)
    var ops = [];

    collection.find({
        "air_temperature": { "$exists": true, "$type": 2 }
    }, function (err, docs) {
        docs.forEach(function(doc){ 
            ops.push({ 
                "updateOne": {
                    "filter": { "_id": doc._id },
                    "update": { 
                        "$set": { 
                            "air_temperature": parseInt(doc.air_temperature, 10),
                            "water_temperature": parseInt(doc.water_temperature, 10),
                            "heat_temperature": parseInt(doc.heat_temperature, 10),
                            "room_temperature": parseInt(doc.room_temperature, 10)
                        } 
                    }
                 }
            });

            if (ops.length === 500) {
                collection.bulkWrite(bulkUpdateOps, function(err, result){
                    if (err) callback(err);
                    callback(null, result);
                });
                ops = [];
            }
        });         

        if (ops.length > 0) { 
            collection.bulkWrite(ops, function(err, result){
                callback(null, result);
            }); 
        }
    });
}

and the logic to get the average temperatures:

function getAverageTemps(collection, callback) {
    collection.aggregate([
        {
            "$group": {
                "_id": null,
                "averageAirTemperature": { "$avg": "$air_temperature" },
                "averageWaterTemperature": { "$avg": "$water_temperature" },
                "averageHeatTemperature": { "$avg": "$heat_temperature" },
                "averageRoomTemperature": { "$avg": "$room_temperature" }
            }
        }
    ], function(err, results){
        if (err || !results) callback(err);
        else callback(null, results);        
    }); 
}

which you can then call as follows, for example you can put an endpoint in your API that retrieves the average temps:

app.get('/meibanlist/averagetemps', function (req, res) {
    getAverageTemps(db.meibanlist, function (err, temps) {
        if (err || !temps) console.log ("record not found");
        else res.json(temps);
    });
});
Source Show
◀ Wstecz