How does MongoDB index arrays?

IndexingMongodb

Indexing Problem Overview


In MongoDB, if I were to store an array (say ["red", "blue"]) in a field "color", does it index "red" and "blue" so I could query for "red", for example, or does in make {"red", "blue"} a composite index?

Indexing Solutions


Solution 1 - Indexing

When it comes to indexing arrays, MongoDB indexes each value of the array so you can query for individual items, such as "red." For example:

> db.col1.save({'colors': ['red','blue']})
> db.col1.ensureIndex({'colors':1})

> db.col1.find({'colors': 'red'})
{ "_id" : ObjectId("4ccc78f97cf9bdc2a2e54ee9"), "colors" : [ "red", "blue" ] }
> db.col1.find({'colors': 'blue'})
{ "_id" : ObjectId("4ccc78f97cf9bdc2a2e54ee9"), "colors" : [ "red", "blue" ] }

For more information, check out MongoDB's documentation on Multikeys: http://www.mongodb.org/display/DOCS/Multikeys

Solution 2 - Indexing

You can simply test index usage by appending "explain" to your query:

> db.col1.save({'colors': ['red','blue']})

# without index
> db.col1.find({'colors': 'red'}).explain()
{
        "queryPlanner" : {
                "plannerVersion" : 1,
                "namespace" : "protrain.col1",
                "indexFilterSet" : false,
                "parsedQuery" : {
                        "colors" : {
                                "$eq" : "red"
                        }
                },
                "winningPlan" : {
                        "stage" : "COLLSCAN", <--- simple column scan
                        "filter" : {
                                "colors" : {
                                        "$eq" : "red"
                                }
                        },
                        "direction" : "forward"
                },
                "rejectedPlans" : [ ]
        },
        "serverInfo" : {
                "host" : "bee34f15fe28",
                "port" : 27017,
                "version" : "3.4.4",
                "gitVersion" : "888390515874a9debd1b6c5d36559ca86b44babd"
        },
        "ok" : 1
}

# query with index
> db.col1.createIndex( { "colors":1 } )
> db.col1.find({'colors': 'red'}).explain()
{
        "queryPlanner" : {
                "plannerVersion" : 1,
                "namespace" : "protrain.col1",
                "indexFilterSet" : false,
                "parsedQuery" : {
                        "colors" : {
                                "$eq" : "red"
                        }
                },
                "winningPlan" : {
                        "stage" : "FETCH",
                        "inputStage" : {
                                "stage" : "IXSCAN", <!---- INDEX HAS BEEN USED
                                "keyPattern" : {
                                        "colors" : 1
                                },
                                "indexName" : "colors_1",
                                "isMultiKey" : true,
                                "multiKeyPaths" : {
                                        "colors" : [
                                                "colors"
                                        ]
                                },
                                "isUnique" : false,
                                "isSparse" : false,
                                "isPartial" : false,
                                "indexVersion" : 2,
                                "direction" : "forward",
                                "indexBounds" : {
                                        "colors" : [
                                                "[\"red\", \"red\"]"
                                        ]
                                }
                        }
                },
                "rejectedPlans" : [ ]
        },
        "serverInfo" : {
                "host" : "bee34f15fe28",
                "port" : 27017,
                "version" : "3.4.4",
                "gitVersion" : "888390515874a9debd1b6c5d36559ca86b44babd"
        },
        "ok" : 1
}

For structures where you have structured indexes, you could use the array position to index fields inside arrays:

{
	'_id': 'BB167E2D61909E848EBC96C7B33251AC',
	'hist': {
		'map': {
			'10': 1
		}
	},
	'wayPoints': [{
		'bhf_name': 'Zinsgutstr.(Berlin)',
		'ext_no': 900180542,
		'lat': 52.435158,
		'lon': 13.559086,
		'puic': 86,
		'time': {
			'dateTime': '2018-01-10T09: 38: 00',
			'offset': {
				'totalSeconds': 3600
			}
		},
		'train_name': 'Bus162'
	},
	{
		'bhf_name': 'SAdlershof(Berlin)',
		'ext_no': 900193002,
		'lat': 52.435104,
		'lon': 13.54055,
		'puic': 86,
		'time': {
			'dateTime': '2018-01-10T09: 44: 00',
			'offset': {
				'totalSeconds': 3600
			}
		},
		'train_name': 'Bus162'
	}]
}


db.col.createIndex( { "wayPoints.0.ext_no":1 } )
db.col.createIndex( { "wayPoints.0.train_name":1 } )
db.col.createIndex( { "wayPoints.1.ext_no":1 } )
db.col.createIndex( { "wayPoints.1.train_name":1 } )

> db.col.find(
... {
...  "wayPoints.ext_no": 900180542
... }
... ,
...     {
...         "wayPoints.ext_no":1,
...         "wayPoints.train_name":1,
...         "wayPoints.time":1
...     }
... ).explain()
{
        "queryPlanner" : {
                "plannerVersion" : 1,
                "namespace" : "db.col",
                "indexFilterSet" : false,
                "parsedQuery" : {
                        "wayPoints.ext_no" : {
                                "$eq" : 900180542
                        }
                },
                "winningPlan" : {
                        "stage" : "PROJECTION",
                        "transformBy" : {
                                "wayPoints.ext_no" : 1,
                                "wayPoints.train_name" : 1,
                                "wayPoints.time" : 1
                        },
                        "inputStage" : {
                                "stage" : "FETCH",
                                "inputStage" : {
                                        "stage" : "IXSCAN",
                                        "keyPattern" : {
                                                "wayPoints.ext_no" : 1
                                        },
                                        "indexName" : "wayPoints.ext_no_1",
                                        "isMultiKey" : true,
                                        "multiKeyPaths" : {
                                                "wayPoints.ext_no" : [
                                                        "wayPoints"
                                                ]
                                        },
                                        "isUnique" : false,
                                        "isSparse" : false,
                                        "isPartial" : false,
                                        "indexVersion" : 2,
                                        "direction" : "forward",
                                        "indexBounds" : {
                                                "wayPoints.ext_no" : [
                                                        "[900180542.0, 900180542.0]"
                                                ]
                                        }
                                }
                        }
                },
                "rejectedPlans" : [ ]
        },
        "serverInfo" : {
                "host" : "bee34f15fe28",
                "port" : 27017,
                "version" : "3.4.4",
                "gitVersion" : "888390515874a9debd1b6c5d36559ca86b44babd"
        },
        "ok" : 1
}

Attributions

All content for this solution is sourced from the original question on Stackoverflow.

The content on this page is licensed under the Attribution-ShareAlike 4.0 International (CC BY-SA 4.0) license.

Content TypeOriginal AuthorOriginal Content on Stackoverflow
QuestionIamICView Question on Stackoverflow
Solution 1 - IndexingCharles HooperView Answer on Stackoverflow
Solution 2 - IndexingLiteradixView Answer on Stackoverflow