Intro to MongDB (NoSQL)#

Requirements#

from pymongo import MongoClient
from pymongo import ASCENDING, DESCENDING
import json
import numpy as np
client = MongoClient('localhost', port=27017)

See your data bases#

client.list_database_names()
['admin',
 'config',
 'local',
 'my_database',
 'radar_tickets_db',
 'task_back_end']

Remove a data base#

# client.drop_database('my_database')
client.list_database_names()
['admin',
 'config',
 'local',
 'my_database',
 'radar_tickets_db',
 'task_back_end']

Create a new data base#

database_1 = client['my_database'] 

Create a new collection in the data base#

collection_1 = database_1['people']

Aggregate a new document into a collection from a data base#

collection_1.insert_one({'name' : 'Pepe', 'age' : 20, 'height' : 1.85, 'studies' : 'civil engineer', 'country' : 'Spain', 'city' : 'Madrid', 'hobbies' : ['music', 'football', 'reading']})
<pymongo.results.InsertOneResult at 0x173712f2a10>

Aggregate several new documents into a collection from a data base#

collection_1.insert_many([{'name' : 'Juan', 'age' : 20, 'height' : 1.7, 'studies' : 'industrial engineer', 'country' : 'Spain', 'city' : 'Toledo', 'hobbies' : ['youtube']},
                          {'name' : 'Lucia', 'age' : 32, 'height' : 1.63, 'studies' : 'psychologist', 'country' : 'Spain', 'city' : 'Madrid', 'hobbies' : ['techno', 'parties']},
                          {'name' : 'Garazi', 'age' : 45, 'height' : 1.55, 'studies' : 'nurse', 'country' : 'Spain', 'city' : 'Pais Vasco', 'hobbies' : ['music', 'shopping', 'hospitals']},
                          {'name' : 'Javi', 'age' : 20, 'height' : 1.65, 'studies' : 'physician', 'country' : 'Spain', 'city' : 'Madrid', 'hobbies' : ['music', 'basket', 'TV shows']},
                          {'name' : 'Fabio', 'age' : 22, 'height' : 1.75, 'studies' : 'statistician', 'country' : 'Spain', 'city' : 'Madrid', 'hobbies' : ['music', 'football', 'programming']}])
<pymongo.results.InsertManyResult at 0x173712bace0>

To see the name of the collections that we have in our data base my_database

database_1.list_collection_names()
['people']

Number of documents in a collection#

collection_1.count_documents({})
12

Remove a collection#

# database_1.drop_collection('people')
database_1.list_collection_names()
['people']

Filtering documents in a collection#

  • To see all the documents in a collection

list(collection_1.find({}))
[{'_id': ObjectId('65414306574841bdc85829aa'),
  'name': 'Pepe',
  'age': 20,
  'height': 1.85,
  'studies': 'civil engineer',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['music', 'football', 'reading']},
 {'_id': ObjectId('65414306574841bdc85829ab'),
  'name': 'Juan',
  'age': 20,
  'height': 1.7,
  'studies': 'industrial engineer',
  'country': 'Spain',
  'city': 'Toledo',
  'hobbies': ['youtube']},
 {'_id': ObjectId('65414306574841bdc85829ac'),
  'name': 'Lucia',
  'age': 32,
  'height': 1.63,
  'studies': 'psychologist',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['techno', 'parties']},
 {'_id': ObjectId('65414306574841bdc85829ad'),
  'name': 'Garazi',
  'age': 22,
  'height': 1.51,
  'studies': 'nurse',
  'country': 'Spain',
  'city': 'Pais Vasco',
  'hobbies': ['music', 'shopping', 'hospitals']},
 {'_id': ObjectId('65414306574841bdc85829ae'),
  'name': 'Javi',
  'age': 20,
  'height': 1.65,
  'studies': 'physician',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['music', 'basket', 'TV shows']},
 {'_id': ObjectId('65414306574841bdc85829af'),
  'name': 'Fabio',
  'age': 22,
  'height': 1.75,
  'studies': 'statistician',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['music', 'football', 'programming']},
 {'_id': ObjectId('6574a37ccb773873c6907f89'),
  'name': 'Pepe',
  'age': 20,
  'height': 1.85,
  'studies': 'civil engineer',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['music', 'football', 'reading']},
 {'_id': ObjectId('6574a37ccb773873c6907f8a'),
  'name': 'Juan',
  'age': 20,
  'height': 1.7,
  'studies': 'industrial engineer',
  'country': 'Spain',
  'city': 'Toledo',
  'hobbies': ['youtube']},
 {'_id': ObjectId('6574a37ccb773873c6907f8b'),
  'name': 'Lucia',
  'age': 32,
  'height': 1.63,
  'studies': 'psychologist',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['techno', 'parties']},
 {'_id': ObjectId('6574a37ccb773873c6907f8c'),
  'name': 'Garazi',
  'age': 45,
  'height': 1.55,
  'studies': 'nurse',
  'country': 'Spain',
  'city': 'Pais Vasco',
  'hobbies': ['music', 'shopping', 'hospitals']},
 {'_id': ObjectId('6574a37ccb773873c6907f8d'),
  'name': 'Javi',
  'age': 20,
  'height': 1.65,
  'studies': 'physician',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['music', 'basket', 'TV shows']},
 {'_id': ObjectId('6574a37ccb773873c6907f8e'),
  'name': 'Fabio',
  'age': 22,
  'height': 1.75,
  'studies': 'statistician',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['music', 'football', 'programming']}]
  • Documents in the collection with age > 30

list(collection_1.find({'age' : {'$gt' : 30}}))
[{'_id': ObjectId('65414306574841bdc85829ac'),
  'name': 'Lucia',
  'age': 32,
  'height': 1.63,
  'studies': 'psychologist',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['techno', 'parties']},
 {'_id': ObjectId('6574a37ccb773873c6907f8b'),
  'name': 'Lucia',
  'age': 32,
  'height': 1.63,
  'studies': 'psychologist',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['techno', 'parties']},
 {'_id': ObjectId('6574a37ccb773873c6907f8c'),
  'name': 'Garazi',
  'age': 45,
  'height': 1.55,
  'studies': 'nurse',
  'country': 'Spain',
  'city': 'Pais Vasco',
  'hobbies': ['music', 'shopping', 'hospitals']}]
  • First document with age > 30

collection_1.find_one({'age' : {'$gt' : 30}}) 
{'_id': ObjectId('65414306574841bdc85829ac'),
 'name': 'Lucia',
 'age': 32,
 'height': 1.63,
 'studies': 'psychologist',
 'country': 'Spain',
 'city': 'Madrid',
 'hobbies': ['techno', 'parties']}
  • Documents with age == 20

list(collection_1.find({'age' : 20}))
[{'_id': ObjectId('65414306574841bdc85829aa'),
  'name': 'Pepe',
  'age': 20,
  'height': 1.85,
  'studies': 'civil engineer',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['music', 'football', 'reading']},
 {'_id': ObjectId('65414306574841bdc85829ab'),
  'name': 'Juan',
  'age': 20,
  'height': 1.7,
  'studies': 'industrial engineer',
  'country': 'Spain',
  'city': 'Toledo',
  'hobbies': ['youtube']},
 {'_id': ObjectId('65414306574841bdc85829ae'),
  'name': 'Javi',
  'age': 20,
  'height': 1.65,
  'studies': 'physician',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['music', 'basket', 'TV shows']},
 {'_id': ObjectId('6574a37ccb773873c6907f89'),
  'name': 'Pepe',
  'age': 20,
  'height': 1.85,
  'studies': 'civil engineer',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['music', 'football', 'reading']},
 {'_id': ObjectId('6574a37ccb773873c6907f8a'),
  'name': 'Juan',
  'age': 20,
  'height': 1.7,
  'studies': 'industrial engineer',
  'country': 'Spain',
  'city': 'Toledo',
  'hobbies': ['youtube']},
 {'_id': ObjectId('6574a37ccb773873c6907f8d'),
  'name': 'Javi',
  'age': 20,
  'height': 1.65,
  'studies': 'physician',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['music', 'basket', 'TV shows']}]
  • Documents in the collection with age < 30

list(collection_1.find({'age' : {'$lt' : 30}}))
[{'_id': ObjectId('65414306574841bdc85829aa'),
  'name': 'Pepe',
  'age': 20,
  'height': 1.85,
  'studies': 'civil engineer',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['music', 'football', 'reading']},
 {'_id': ObjectId('65414306574841bdc85829ab'),
  'name': 'Juan',
  'age': 20,
  'height': 1.7,
  'studies': 'industrial engineer',
  'country': 'Spain',
  'city': 'Toledo',
  'hobbies': ['youtube']},
 {'_id': ObjectId('65414306574841bdc85829ad'),
  'name': 'Garazi',
  'age': 22,
  'height': 1.51,
  'studies': 'nurse',
  'country': 'Spain',
  'city': 'Pais Vasco',
  'hobbies': ['music', 'shopping', 'hospitals']},
 {'_id': ObjectId('65414306574841bdc85829ae'),
  'name': 'Javi',
  'age': 20,
  'height': 1.65,
  'studies': 'physician',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['music', 'basket', 'TV shows']},
 {'_id': ObjectId('65414306574841bdc85829af'),
  'name': 'Fabio',
  'age': 22,
  'height': 1.75,
  'studies': 'statistician',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['music', 'football', 'programming']},
 {'_id': ObjectId('6574a37ccb773873c6907f89'),
  'name': 'Pepe',
  'age': 20,
  'height': 1.85,
  'studies': 'civil engineer',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['music', 'football', 'reading']},
 {'_id': ObjectId('6574a37ccb773873c6907f8a'),
  'name': 'Juan',
  'age': 20,
  'height': 1.7,
  'studies': 'industrial engineer',
  'country': 'Spain',
  'city': 'Toledo',
  'hobbies': ['youtube']},
 {'_id': ObjectId('6574a37ccb773873c6907f8d'),
  'name': 'Javi',
  'age': 20,
  'height': 1.65,
  'studies': 'physician',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['music', 'basket', 'TV shows']},
 {'_id': ObjectId('6574a37ccb773873c6907f8e'),
  'name': 'Fabio',
  'age': 22,
  'height': 1.75,
  'studies': 'statistician',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['music', 'football', 'programming']}]
  • Documents in the collection with age <= 32

list(collection_1.find({'age' : {'$lte' : 32}}))
[{'_id': ObjectId('65414306574841bdc85829aa'),
  'name': 'Pepe',
  'age': 20,
  'height': 1.85,
  'studies': 'civil engineer',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['music', 'football', 'reading']},
 {'_id': ObjectId('65414306574841bdc85829ab'),
  'name': 'Juan',
  'age': 20,
  'height': 1.7,
  'studies': 'industrial engineer',
  'country': 'Spain',
  'city': 'Toledo',
  'hobbies': ['youtube']},
 {'_id': ObjectId('65414306574841bdc85829ac'),
  'name': 'Lucia',
  'age': 32,
  'height': 1.63,
  'studies': 'psychologist',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['techno', 'parties']},
 {'_id': ObjectId('65414306574841bdc85829ad'),
  'name': 'Garazi',
  'age': 22,
  'height': 1.51,
  'studies': 'nurse',
  'country': 'Spain',
  'city': 'Pais Vasco',
  'hobbies': ['music', 'shopping', 'hospitals']},
 {'_id': ObjectId('65414306574841bdc85829ae'),
  'name': 'Javi',
  'age': 20,
  'height': 1.65,
  'studies': 'physician',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['music', 'basket', 'TV shows']},
 {'_id': ObjectId('65414306574841bdc85829af'),
  'name': 'Fabio',
  'age': 22,
  'height': 1.75,
  'studies': 'statistician',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['music', 'football', 'programming']},
 {'_id': ObjectId('6574a37ccb773873c6907f89'),
  'name': 'Pepe',
  'age': 20,
  'height': 1.85,
  'studies': 'civil engineer',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['music', 'football', 'reading']},
 {'_id': ObjectId('6574a37ccb773873c6907f8a'),
  'name': 'Juan',
  'age': 20,
  'height': 1.7,
  'studies': 'industrial engineer',
  'country': 'Spain',
  'city': 'Toledo',
  'hobbies': ['youtube']},
 {'_id': ObjectId('6574a37ccb773873c6907f8b'),
  'name': 'Lucia',
  'age': 32,
  'height': 1.63,
  'studies': 'psychologist',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['techno', 'parties']},
 {'_id': ObjectId('6574a37ccb773873c6907f8d'),
  'name': 'Javi',
  'age': 20,
  'height': 1.65,
  'studies': 'physician',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['music', 'basket', 'TV shows']},
 {'_id': ObjectId('6574a37ccb773873c6907f8e'),
  'name': 'Fabio',
  'age': 22,
  'height': 1.75,
  'studies': 'statistician',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['music', 'football', 'programming']}]
  • Documents in the collection with age >= 32

list(collection_1.find({'age' : {'$gte' : 32}}))
[{'_id': ObjectId('65414306574841bdc85829ac'),
  'name': 'Lucia',
  'age': 32,
  'height': 1.63,
  'studies': 'psychologist',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['techno', 'parties']},
 {'_id': ObjectId('6574a37ccb773873c6907f8b'),
  'name': 'Lucia',
  'age': 32,
  'height': 1.63,
  'studies': 'psychologist',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['techno', 'parties']},
 {'_id': ObjectId('6574a37ccb773873c6907f8c'),
  'name': 'Garazi',
  'age': 45,
  'height': 1.55,
  'studies': 'nurse',
  'country': 'Spain',
  'city': 'Pais Vasco',
  'hobbies': ['music', 'shopping', 'hospitals']}]
  • Documents in the collection with age != 32

list(collection_1.find({'age' : {'$ne' : 32}}))
[{'_id': ObjectId('65414306574841bdc85829aa'),
  'name': 'Pepe',
  'age': 20,
  'height': 1.85,
  'studies': 'civil engineer',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['music', 'football', 'reading']},
 {'_id': ObjectId('65414306574841bdc85829ab'),
  'name': 'Juan',
  'age': 20,
  'height': 1.7,
  'studies': 'industrial engineer',
  'country': 'Spain',
  'city': 'Toledo',
  'hobbies': ['youtube']},
 {'_id': ObjectId('65414306574841bdc85829ad'),
  'name': 'Garazi',
  'age': 22,
  'height': 1.51,
  'studies': 'nurse',
  'country': 'Spain',
  'city': 'Pais Vasco',
  'hobbies': ['music', 'shopping', 'hospitals']},
 {'_id': ObjectId('65414306574841bdc85829ae'),
  'name': 'Javi',
  'age': 20,
  'height': 1.65,
  'studies': 'physician',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['music', 'basket', 'TV shows']},
 {'_id': ObjectId('65414306574841bdc85829af'),
  'name': 'Fabio',
  'age': 22,
  'height': 1.75,
  'studies': 'statistician',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['music', 'football', 'programming']},
 {'_id': ObjectId('6574a37ccb773873c6907f89'),
  'name': 'Pepe',
  'age': 20,
  'height': 1.85,
  'studies': 'civil engineer',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['music', 'football', 'reading']},
 {'_id': ObjectId('6574a37ccb773873c6907f8a'),
  'name': 'Juan',
  'age': 20,
  'height': 1.7,
  'studies': 'industrial engineer',
  'country': 'Spain',
  'city': 'Toledo',
  'hobbies': ['youtube']},
 {'_id': ObjectId('6574a37ccb773873c6907f8c'),
  'name': 'Garazi',
  'age': 45,
  'height': 1.55,
  'studies': 'nurse',
  'country': 'Spain',
  'city': 'Pais Vasco',
  'hobbies': ['music', 'shopping', 'hospitals']},
 {'_id': ObjectId('6574a37ccb773873c6907f8d'),
  'name': 'Javi',
  'age': 20,
  'height': 1.65,
  'studies': 'physician',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['music', 'basket', 'TV shows']},
 {'_id': ObjectId('6574a37ccb773873c6907f8e'),
  'name': 'Fabio',
  'age': 22,
  'height': 1.75,
  'studies': 'statistician',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['music', 'football', 'programming']}]
  • Documents in the collection with age >= 32 and city == Madrid

list(collection_1.find({'age' : {'$gte' : 30}, 'city' : 'Madrid'}))
[{'_id': ObjectId('65414306574841bdc85829ac'),
  'name': 'Lucia',
  'age': 32,
  'height': 1.63,
  'studies': 'psychologist',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['techno', 'parties']},
 {'_id': ObjectId('6574a37ccb773873c6907f8b'),
  'name': 'Lucia',
  'age': 32,
  'height': 1.63,
  'studies': 'psychologist',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['techno', 'parties']}]
  • Documents in the collection with age > 40 or city == Toledo

list(collection_1.find({'$or': [{'age': {'$gte': 30}}, {'city': 'Toledo'}]}))
[{'_id': ObjectId('65414306574841bdc85829ab'),
  'name': 'Juan',
  'age': 20,
  'height': 1.7,
  'studies': 'industrial engineer',
  'country': 'Spain',
  'city': 'Toledo',
  'hobbies': ['youtube']},
 {'_id': ObjectId('65414306574841bdc85829ac'),
  'name': 'Lucia',
  'age': 32,
  'height': 1.63,
  'studies': 'psychologist',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['techno', 'parties']},
 {'_id': ObjectId('6574a37ccb773873c6907f8a'),
  'name': 'Juan',
  'age': 20,
  'height': 1.7,
  'studies': 'industrial engineer',
  'country': 'Spain',
  'city': 'Toledo',
  'hobbies': ['youtube']},
 {'_id': ObjectId('6574a37ccb773873c6907f8b'),
  'name': 'Lucia',
  'age': 32,
  'height': 1.63,
  'studies': 'psychologist',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['techno', 'parties']},
 {'_id': ObjectId('6574a37ccb773873c6907f8c'),
  'name': 'Garazi',
  'age': 45,
  'height': 1.55,
  'studies': 'nurse',
  'country': 'Spain',
  'city': 'Pais Vasco',
  'hobbies': ['music', 'shopping', 'hospitals']}]
  • Documents in the collection with football in hobbies list

list(collection_1.find({'hobbies': 'football'}))
[{'_id': ObjectId('65414306574841bdc85829aa'),
  'name': 'Pepe',
  'age': 20,
  'height': 1.85,
  'studies': 'civil engineer',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['music', 'football', 'reading']},
 {'_id': ObjectId('65414306574841bdc85829af'),
  'name': 'Fabio',
  'age': 22,
  'height': 1.75,
  'studies': 'statistician',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['music', 'football', 'programming']},
 {'_id': ObjectId('6574a37ccb773873c6907f89'),
  'name': 'Pepe',
  'age': 20,
  'height': 1.85,
  'studies': 'civil engineer',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['music', 'football', 'reading']},
 {'_id': ObjectId('6574a37ccb773873c6907f8e'),
  'name': 'Fabio',
  'age': 22,
  'height': 1.75,
  'studies': 'statistician',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['music', 'football', 'programming']}]
  • documents with (age >= 30 and city == Madrid) or (height <= 1.75 and age < 25)

query = {'$or': [{'$and': [{'age': {'$gte': 30}}, {'city': 'Madrid'}]}, {'$and': [{'height': {'$lt': 1.75}}, {'age': {'$lt': 25}}]}]}

list(collection_1.find(query))
[{'_id': ObjectId('65414306574841bdc85829ab'),
  'name': 'Juan',
  'age': 20,
  'height': 1.7,
  'studies': 'industrial engineer',
  'country': 'Spain',
  'city': 'Toledo',
  'hobbies': ['youtube']},
 {'_id': ObjectId('65414306574841bdc85829ac'),
  'name': 'Lucia',
  'age': 32,
  'height': 1.63,
  'studies': 'psychologist',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['techno', 'parties']},
 {'_id': ObjectId('65414306574841bdc85829ad'),
  'name': 'Garazi',
  'age': 22,
  'height': 1.51,
  'studies': 'nurse',
  'country': 'Spain',
  'city': 'Pais Vasco',
  'hobbies': ['music', 'shopping', 'hospitals']},
 {'_id': ObjectId('65414306574841bdc85829ae'),
  'name': 'Javi',
  'age': 20,
  'height': 1.65,
  'studies': 'physician',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['music', 'basket', 'TV shows']},
 {'_id': ObjectId('6574a37ccb773873c6907f8a'),
  'name': 'Juan',
  'age': 20,
  'height': 1.7,
  'studies': 'industrial engineer',
  'country': 'Spain',
  'city': 'Toledo',
  'hobbies': ['youtube']},
 {'_id': ObjectId('6574a37ccb773873c6907f8b'),
  'name': 'Lucia',
  'age': 32,
  'height': 1.63,
  'studies': 'psychologist',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['techno', 'parties']},
 {'_id': ObjectId('6574a37ccb773873c6907f8d'),
  'name': 'Javi',
  'age': 20,
  'height': 1.65,
  'studies': 'physician',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['music', 'basket', 'TV shows']}]

Sorting#

query = {'$or': [{'$and': [{'age': {'$gte': 30}}, {'city': 'Madrid'}]}, {'$and': [{'height': {'$lt': 1.75}}, {'age': {'$lt': 25}}]}]}
list(collection_1.find(query).sort('height', 1))
[{'_id': ObjectId('65414306574841bdc85829ad'),
  'name': 'Garazi',
  'age': 22,
  'height': 1.51,
  'studies': 'nurse',
  'country': 'Spain',
  'city': 'Pais Vasco',
  'hobbies': ['music', 'shopping', 'hospitals']},
 {'_id': ObjectId('65414306574841bdc85829ac'),
  'name': 'Lucia',
  'age': 32,
  'height': 1.63,
  'studies': 'psychologist',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['techno', 'parties']},
 {'_id': ObjectId('6574a37ccb773873c6907f8b'),
  'name': 'Lucia',
  'age': 32,
  'height': 1.63,
  'studies': 'psychologist',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['techno', 'parties']},
 {'_id': ObjectId('65414306574841bdc85829ae'),
  'name': 'Javi',
  'age': 20,
  'height': 1.65,
  'studies': 'physician',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['music', 'basket', 'TV shows']},
 {'_id': ObjectId('6574a37ccb773873c6907f8d'),
  'name': 'Javi',
  'age': 20,
  'height': 1.65,
  'studies': 'physician',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['music', 'basket', 'TV shows']},
 {'_id': ObjectId('65414306574841bdc85829ab'),
  'name': 'Juan',
  'age': 20,
  'height': 1.7,
  'studies': 'industrial engineer',
  'country': 'Spain',
  'city': 'Toledo',
  'hobbies': ['youtube']},
 {'_id': ObjectId('6574a37ccb773873c6907f8a'),
  'name': 'Juan',
  'age': 20,
  'height': 1.7,
  'studies': 'industrial engineer',
  'country': 'Spain',
  'city': 'Toledo',
  'hobbies': ['youtube']}]
list(collection_1.find(query).sort('height', -1))
[{'_id': ObjectId('65414306574841bdc85829ab'),
  'name': 'Juan',
  'age': 20,
  'height': 1.7,
  'studies': 'industrial engineer',
  'country': 'Spain',
  'city': 'Toledo',
  'hobbies': ['youtube']},
 {'_id': ObjectId('6574a37ccb773873c6907f8a'),
  'name': 'Juan',
  'age': 20,
  'height': 1.7,
  'studies': 'industrial engineer',
  'country': 'Spain',
  'city': 'Toledo',
  'hobbies': ['youtube']},
 {'_id': ObjectId('65414306574841bdc85829ae'),
  'name': 'Javi',
  'age': 20,
  'height': 1.65,
  'studies': 'physician',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['music', 'basket', 'TV shows']},
 {'_id': ObjectId('6574a37ccb773873c6907f8d'),
  'name': 'Javi',
  'age': 20,
  'height': 1.65,
  'studies': 'physician',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['music', 'basket', 'TV shows']},
 {'_id': ObjectId('65414306574841bdc85829ac'),
  'name': 'Lucia',
  'age': 32,
  'height': 1.63,
  'studies': 'psychologist',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['techno', 'parties']},
 {'_id': ObjectId('6574a37ccb773873c6907f8b'),
  'name': 'Lucia',
  'age': 32,
  'height': 1.63,
  'studies': 'psychologist',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['techno', 'parties']},
 {'_id': ObjectId('65414306574841bdc85829ad'),
  'name': 'Garazi',
  'age': 22,
  'height': 1.51,
  'studies': 'nurse',
  'country': 'Spain',
  'city': 'Pais Vasco',
  'hobbies': ['music', 'shopping', 'hospitals']}]

Remove documents from a collection#

  • Delete the first documents with age == 20

collection_1.delete_one({'age' : 20})  
<pymongo.results.DeleteResult at 0x173713cb970>
list(collection_1.find({}))
[{'_id': ObjectId('65414306574841bdc85829ab'),
  'name': 'Juan',
  'age': 20,
  'height': 1.7,
  'studies': 'industrial engineer',
  'country': 'Spain',
  'city': 'Toledo',
  'hobbies': ['youtube']},
 {'_id': ObjectId('65414306574841bdc85829ac'),
  'name': 'Lucia',
  'age': 32,
  'height': 1.63,
  'studies': 'psychologist',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['techno', 'parties']},
 {'_id': ObjectId('65414306574841bdc85829ad'),
  'name': 'Garazi',
  'age': 22,
  'height': 1.51,
  'studies': 'nurse',
  'country': 'Spain',
  'city': 'Pais Vasco',
  'hobbies': ['music', 'shopping', 'hospitals']},
 {'_id': ObjectId('65414306574841bdc85829ae'),
  'name': 'Javi',
  'age': 20,
  'height': 1.65,
  'studies': 'physician',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['music', 'basket', 'TV shows']},
 {'_id': ObjectId('65414306574841bdc85829af'),
  'name': 'Fabio',
  'age': 22,
  'height': 1.75,
  'studies': 'statistician',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['music', 'football', 'programming']},
 {'_id': ObjectId('6574a37ccb773873c6907f89'),
  'name': 'Pepe',
  'age': 20,
  'height': 1.85,
  'studies': 'civil engineer',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['music', 'football', 'reading']},
 {'_id': ObjectId('6574a37ccb773873c6907f8a'),
  'name': 'Juan',
  'age': 20,
  'height': 1.7,
  'studies': 'industrial engineer',
  'country': 'Spain',
  'city': 'Toledo',
  'hobbies': ['youtube']},
 {'_id': ObjectId('6574a37ccb773873c6907f8b'),
  'name': 'Lucia',
  'age': 32,
  'height': 1.63,
  'studies': 'psychologist',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['techno', 'parties']},
 {'_id': ObjectId('6574a37ccb773873c6907f8c'),
  'name': 'Garazi',
  'age': 45,
  'height': 1.55,
  'studies': 'nurse',
  'country': 'Spain',
  'city': 'Pais Vasco',
  'hobbies': ['music', 'shopping', 'hospitals']},
 {'_id': ObjectId('6574a37ccb773873c6907f8d'),
  'name': 'Javi',
  'age': 20,
  'height': 1.65,
  'studies': 'physician',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['music', 'basket', 'TV shows']},
 {'_id': ObjectId('6574a37ccb773873c6907f8e'),
  'name': 'Fabio',
  'age': 22,
  'height': 1.75,
  'studies': 'statistician',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['music', 'football', 'programming']}]
  • Delete all the documents with age == 20

collection_1.delete_many({'age' : 20})  
<pymongo.results.DeleteResult at 0x173715223b0>
list(collection_1.find({}))
[{'_id': ObjectId('65414306574841bdc85829ac'),
  'name': 'Lucia',
  'age': 32,
  'height': 1.63,
  'studies': 'psychologist',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['techno', 'parties']},
 {'_id': ObjectId('65414306574841bdc85829ad'),
  'name': 'Garazi',
  'age': 22,
  'height': 1.51,
  'studies': 'nurse',
  'country': 'Spain',
  'city': 'Pais Vasco',
  'hobbies': ['music', 'shopping', 'hospitals']},
 {'_id': ObjectId('65414306574841bdc85829af'),
  'name': 'Fabio',
  'age': 22,
  'height': 1.75,
  'studies': 'statistician',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['music', 'football', 'programming']},
 {'_id': ObjectId('6574a37ccb773873c6907f8b'),
  'name': 'Lucia',
  'age': 32,
  'height': 1.63,
  'studies': 'psychologist',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['techno', 'parties']},
 {'_id': ObjectId('6574a37ccb773873c6907f8c'),
  'name': 'Garazi',
  'age': 45,
  'height': 1.55,
  'studies': 'nurse',
  'country': 'Spain',
  'city': 'Pais Vasco',
  'hobbies': ['music', 'shopping', 'hospitals']},
 {'_id': ObjectId('6574a37ccb773873c6907f8e'),
  'name': 'Fabio',
  'age': 22,
  'height': 1.75,
  'studies': 'statistician',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['music', 'football', 'programming']}]
  • Delete all the documents

collection_1.delete_many({}) 
<pymongo.results.DeleteResult at 0x173713cbdc0>
list(collection_1.find({}))
[]
  • Restarting the original collection

collection_1.insert_many([{'name' : 'Pepe', 'age' : 20, 'height' : 1.85, 'studies' : 'civil engineer', 'country' : 'Spain', 'city' : 'Madrid', 'hobbies' : ['music', 'football', 'reading']},
                          {'name' : 'Juan', 'age' : 20, 'height' : 1.7, 'studies' : 'industrial engineer', 'country' : 'Spain', 'city' : 'Toledo', 'hobbies' : ['youtube']},
                          {'name' : 'Lucia', 'age' : 32, 'height' : 1.63, 'studies' : 'psychologist', 'country' : 'Spain', 'city' : 'Madrid', 'hobbies' : ['techno', 'parties']},
                          {'name' : 'Garazi', 'age' : 45, 'height' : 1.55, 'studies' : 'nurse', 'country' : 'Spain', 'city' : 'Pais Vasco', 'hobbies' : ['music', 'shopping', 'hospitals']},
                          {'name' : 'Javi', 'age' : 20, 'height' : 1.65, 'studies' : 'physician', 'country' : 'Spain', 'city' : 'Madrid', 'hobbies' : ['music', 'basket', 'TV shows']},
                          {'name' : 'Fabio', 'age' : 22, 'height' : 1.75, 'studies' : 'statistician', 'country' : 'Spain', 'city' : 'Madrid', 'hobbies' : ['music', 'football', 'programming']}])
<pymongo.results.InsertManyResult at 0x17371537eb0>
list(collection_1.find({}))
[{'_id': ObjectId('6574a37ccb773873c6907f8f'),
  'name': 'Pepe',
  'age': 20,
  'height': 1.85,
  'studies': 'civil engineer',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['music', 'football', 'reading']},
 {'_id': ObjectId('6574a37ccb773873c6907f90'),
  'name': 'Juan',
  'age': 20,
  'height': 1.7,
  'studies': 'industrial engineer',
  'country': 'Spain',
  'city': 'Toledo',
  'hobbies': ['youtube']},
 {'_id': ObjectId('6574a37ccb773873c6907f91'),
  'name': 'Lucia',
  'age': 32,
  'height': 1.63,
  'studies': 'psychologist',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['techno', 'parties']},
 {'_id': ObjectId('6574a37ccb773873c6907f92'),
  'name': 'Garazi',
  'age': 45,
  'height': 1.55,
  'studies': 'nurse',
  'country': 'Spain',
  'city': 'Pais Vasco',
  'hobbies': ['music', 'shopping', 'hospitals']},
 {'_id': ObjectId('6574a37ccb773873c6907f93'),
  'name': 'Javi',
  'age': 20,
  'height': 1.65,
  'studies': 'physician',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['music', 'basket', 'TV shows']},
 {'_id': ObjectId('6574a37ccb773873c6907f94'),
  'name': 'Fabio',
  'age': 22,
  'height': 1.75,
  'studies': 'statistician',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['music', 'football', 'programming']}]

Update a document#

collection_1.update_one({'name' : 'Garazi'}, {'$set' : {'age' : 22, 'height' : 1.51}})
<pymongo.results.UpdateResult at 0x17371500940>
list(collection_1.find({}))
[{'_id': ObjectId('6574a37ccb773873c6907f8f'),
  'name': 'Pepe',
  'age': 20,
  'height': 1.85,
  'studies': 'civil engineer',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['music', 'football', 'reading']},
 {'_id': ObjectId('6574a37ccb773873c6907f90'),
  'name': 'Juan',
  'age': 20,
  'height': 1.7,
  'studies': 'industrial engineer',
  'country': 'Spain',
  'city': 'Toledo',
  'hobbies': ['youtube']},
 {'_id': ObjectId('6574a37ccb773873c6907f91'),
  'name': 'Lucia',
  'age': 32,
  'height': 1.63,
  'studies': 'psychologist',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['techno', 'parties']},
 {'_id': ObjectId('6574a37ccb773873c6907f92'),
  'name': 'Garazi',
  'age': 22,
  'height': 1.51,
  'studies': 'nurse',
  'country': 'Spain',
  'city': 'Pais Vasco',
  'hobbies': ['music', 'shopping', 'hospitals']},
 {'_id': ObjectId('6574a37ccb773873c6907f93'),
  'name': 'Javi',
  'age': 20,
  'height': 1.65,
  'studies': 'physician',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['music', 'basket', 'TV shows']},
 {'_id': ObjectId('6574a37ccb773873c6907f94'),
  'name': 'Fabio',
  'age': 22,
  'height': 1.75,
  'studies': 'statistician',
  'country': 'Spain',
  'city': 'Madrid',
  'hobbies': ['music', 'football', 'programming']}]

Quering Radar Tickets DB#

client.drop_database('radar_tickets_db')
  • Create a new database with a collection to work in.

radar_tickets_db = client['radar_tickets_db'] 
radar_tickets_collection = radar_tickets_db['radar_tickets_collection']
  • We load the json file that contains the collection with which we will work.

with open(r'C:\Users\fscielzo\Documents\Large-Data\radar_tickets.json', 'r') as file:
    radar_tickets = json.load(file)
---------------------------------------------------------------------------
JSONDecodeError                           Traceback (most recent call last)
Cell In[46], line 2
      1 with open(r'C:\Users\fscielzo\Documents\Large-Data\radar_tickets.json', 'r') as file:
----> 2     radar_tickets = json.load(file)

File c:\Users\fscielzo\AppData\Local\Programs\Python\Python312\Lib\json\__init__.py:293, in load(fp, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
    274 def load(fp, *, cls=None, object_hook=None, parse_float=None,
    275         parse_int=None, parse_constant=None, object_pairs_hook=None, **kw):
    276     """Deserialize ``fp`` (a ``.read()``-supporting file-like object containing
    277     a JSON document) to a Python object.
    278 
   (...)
    291     kwarg; otherwise ``JSONDecoder`` is used.
    292     """
--> 293     return loads(fp.read(),
    294         cls=cls, object_hook=object_hook,
    295         parse_float=parse_float, parse_int=parse_int,
    296         parse_constant=parse_constant, object_pairs_hook=object_pairs_hook, **kw)

File c:\Users\fscielzo\AppData\Local\Programs\Python\Python312\Lib\json\__init__.py:346, in loads(s, cls, object_hook, parse_float, parse_int, parse_constant, object_pairs_hook, **kw)
    341     s = s.decode(detect_encoding(s), 'surrogatepass')
    343 if (cls is None and object_hook is None and
    344         parse_int is None and parse_float is None and
    345         parse_constant is None and object_pairs_hook is None and not kw):
--> 346     return _default_decoder.decode(s)
    347 if cls is None:
    348     cls = JSONDecoder

File c:\Users\fscielzo\AppData\Local\Programs\Python\Python312\Lib\json\decoder.py:340, in JSONDecoder.decode(self, s, _w)
    338 end = _w(s, end).end()
    339 if end != len(s):
--> 340     raise JSONDecodeError("Extra data", s, end)
    341 return obj

JSONDecodeError: Extra data: line 58 column 1 (char 2329)

The problem that we are encountering is due to the lack of commas between the documents (whole dictionaries) contained in the file radar_tickets.json.

To overcome this problem we have to insert commas in that positions. As there are 500000 documents in the file, we cannot do it manually, so, we are going to automate the task.

  • First: save the radar_tickets.json file as a text file (.txt).

    This can be done easily from vs code left bar. Just make a copy of radar_tickets.json and rename it as radar_tickets.txt.

  • Second: open the text file radar_tickets.txt

txt_file = open(r"C:\Users\fscielzo\Documents\Large-Data\radar_tickets.txt")
txt_file = txt_file.read()
  • Third: split the text txt_file by the string ‘{ \n “_id”’ and store it in a new variable (splits).

splits = txt_file.split('{ \n  "_id"')
  • Fourth: reconstruct the documents, by adding ‘{ \n “_id”’ at the beginning of each element of splits and storing each element in the listradar_tickets_documents.

radar_tickets_documents = list()

for i in range(1, len(splits)) :

    radar_tickets_documents.append(json.loads('{ \n  "_id"' + splits[i]))
  • Fifth: save the list radar_tickets_documents as a json file.

with open(r"C:\Users\fscielzo\Documents\Large-Data\radar_tickets_new.json", 'w') as file:
    json.dump(radar_tickets_documents, file)
  • Sixth: open the new json file build through previous steps. This is the json with which we will work.

with open(r"C:\Users\fscielzo\Documents\Large-Data\radar_tickets_new.json", 'r') as file:
    radar_tickets = json.load(file)
  • Seventh: insert by insert_many method the new json file in the created collection (radar_tickets_collection) created at the beginning, which was empty until now.

radar_tickets_collection.insert_many(radar_tickets)
<pymongo.results.InsertManyResult at 0x1730ac3f0d0>

Counting the number of documents:

radar_tickets_collection.count_documents({})
50000
list(radar_tickets_collection.find({}))

Here we display only a part of the previous output since it is massive to be displayed completely.

...
{'_id': 'A1/-160/20130914132710000',
  'dump date': '02/10/18',
  'road': {'name': 'A1', 'speed limit': 120},
  'radar': {'mileage': 160, 'direction': 'descending', 'speed limit': 100},
  'vehicle': {'number plate': '9495AIA',
   'make': 'Fork',
   'model': 'Bipua',
   'power': 1400,
   'colour': 'gris',
   'chassis number': 'OOQ95654Q25754626',
   'registry date': 'Tuesday 10TH of July, 2007',
   'roadworthiness': '02/09/2010',
   'Driver': {'DNI': '48906593Z',
    'Name': 'Amarino',
    'Surname': 'Soto',
    'Sec_Surname': 'Baquerizo',
    'Address': 'Calle del Roble, N 178, 92901',
    'Town': 'Valvacas de los Ciervos',
    'Phone number': 555562373,
    'Email': 'Nasas@servcorreo.uctreseme.edu',
    'Birthdate': '16/09/1946',
    'driving license': {'type': 'C1', 'date': '04/06/2011'}},
   'Owner': {'DNI': '30173036A',
    'Name': 'Secundino',
    'Surname': 'Illescas',
    'Sec_Surname': 'Casanova',
    'Address': 'Travesia San Crispulo, N 152, 67758',
    'Town': 'Sotodebajo',
    'Email': 'Seci@servcorreo.uctreseme.edu',
    'Birthdate': '05/04/1962'}},
  'Record': {'rec_ID': 201325769096,
   'file': 'E:/data/vid/20130914/10069096.avi',
   'date': '14/09/2013',
   'time': '13:27:10.000',
   'speed': 91}},
 {'_id': 'M30/+11/20120410154721000',
  'dump date': '02/10/18',
  'road': {'name': 'M30', 'speed limit': 70},
  'radar': {'mileage': 11, 'direction': 'ascending', 'speed limit': 50},
  'vehicle': {'number plate': '9063AOI',
   'make': 'Rinaul',
   'model': 'Arroyo',
   'power': 1600,
   'colour': 'plateado',
   'chassis number': 'QQO35197I53881067',
   'registry date': 'Sunday 25TH of November, 2007',
   'roadworthiness': '28/08/2011',
   'Driver': {'DNI': '48272209Q',
    'Name': 'agata',
    'Surname': 'Marquina',
    'Sec_Surname': 'Granda',
    'Address': 'Plaza del Pozo, N 133, 90558',
    'Town': 'Villapiedras',
    'Phone number': 555551208,
    'Birthdate': '01/11/1952',
    'driving license': {'type': 'B', 'date': '20/04/2004'}},
   'Owner': {'DNI': '22649968M',
    'Name': 'Maria del Mar',
    'Surname': 'Diaz',
    'Sec_Surname': 'Zuniga',
    'Address': 'Avenida del Arroyo, N 153, 67758',
    'Town': 'Sotodebajo',
    'Phone number': 555571262,
    'Email': 'Coscha@servcorreo.uctreseme.edu',
    'Birthdate': '11/02/1956'}},
  'Record': {'rec_ID': 201210169095,
   'file': 'E:/data/vid/20120410/10069095.avi',
   'date': '10/04/2012',
   'time': '15:47:21.000',
   'speed': 69},
  'Speed ticket': {'Issue date': '27/04/2018',
   'Amount': 190,
   'Currency': 'euro',
   'Debtor': {'DNI': '22649968M',
    'Name': 'Maria del Mar',
    'Surname': 'Diaz',
    'Sec_Surname': 'Zuniga',
    'Address': 'Avenida del Arroyo, N 153, 67758',
    'Town': 'Sotodebajo',
    'Phone number': 555571262,
    'Email': 'Coscha@servcorreo.uctreseme.edu',
    'Birthdate': '11/02/1956',
    'Insolvency': 'No'},
   'Pay date': '11/05/2018',
   'Pay type': 'bank transfer',
   'State': 'fullfilled'}},
 {'_id': 'A2/+10/20120510155119000',
  'dump date': '02/10/18',
  'road': {'name': 'A2', 'speed limit': 120},
  'radar': {'mileage': 10, 'direction': 'ascending', 'speed limit': 100},
  'vehicle': {'number plate': '6111IAI',
   'make': 'Rinaul',
   'model': 'Fuente',
   'power': 1400,
   'colour': 'dorado',
   'chassis number': 'QIO78629I56366773',
   'registry date': 'Monday 25TH of October, 2004',
   'roadworthiness': [{'MOT date': '06/01/2009',
     'shortcomings': ['insufficient braking']},
    {'MOT date': '18/12/2008', 'shortcomings': ['worn tires', 'minor faults']},
    {'MOT date': '09/01/2009', 'shortcomings': ['broken headlight']},
    {'MOT date': '26/12/2011', 'shortcomings': ['defective bulb']}],
   'Driver': {'DNI': '70809340E',
    'Name': 'Maria de la Epifania',
    'Surname': 'Perez',
    'Sec_Surname': 'Rodriguez',
    'Address': 'Avenida de las Carretas, N 153, 87344',
    'Town': 'Villatrigos de Manteca',
    'Phone number': 555719522,
    'Birthdate': '24/01/1958',
    'driving license': {'type': 'B', 'date': '31/12/2007'}},
   'Owner': {'DNI': '70809340E',
    'Name': 'Maria de la Epifania',
    'Surname': 'Perez',
    'Sec_Surname': 'Rodriguez',
    'Address': 'Avenida de las Carretas, N 153, 87344',
    'Town': 'Villatrigos de Manteca',
    'Phone number': 555719522,
    'Birthdate': '24/01/1958'}},
  'Record': {'rec_ID': 201213169094,
   'file': 'E:/data/vid/20120510/10069094.avi',
   'date': '10/05/2012',
   'time': '15:51:19.000',
   'speed': 123},
  'Speed ticket': {'Issue date': '27/04/2018',
   'Amount': 230,
   'Currency': 'euro',
   'Debtor': {'DNI': '70809340E',
    'Name': 'Maria de la Epifania',
    'Surname': 'Perez',
    'Sec_Surname': 'Rodriguez',
    'Address': 'Avenida de las Carretas, N 153, 87344',
    'Town': 'Villatrigos de Manteca',
    'Phone number': 555719522,
    'Birthdate': '24/01/1958',
    'Insolvency': 'No'},
   'Pay date': '08/05/2018',
   'Pay type': 'bank transfer',
   'State': 'fullfilled'}},
 {'_id': 'A3/-206/20120925201344000',
  'dump date': '02/10/18',
  'road': {'name': 'A3', 'speed limit': 120},
  'radar': {'mileage': 206, 'direction': 'descending', 'speed limit': 100},
  'vehicle': {'number plate': '8686IUU',
   'make': 'Fork',
   'model': 'Tetrox',
   'power': 1400,
   'colour': 'dorado',
   'chassis number': 'OQO43241Q19559486',
   'registry date': 'Saturday 31ST of August, 2002',
   'roadworthiness': [{'MOT date': '08/11/2006',
     'shortcomings': ['broken headlight']},
    {'MOT date': '23/11/2006', 'shortcomings': ['spoiled engine']},
    {'MOT date': '12/11/2006', 'shortcomings': ['broken headlight']},
    {'MOT date': '20/11/2006', 'shortcomings': ['damaged license plate']},
    {'MOT date': '12/11/2010'}],
   'Driver': {'DNI': '95875344M',
    'Name': 'Andres',
    'Surname': 'Matos',
    'Sec_Surname': 'Osorio',
    'Address': 'Calle Curtidores, N 161, 61231',
    'Town': 'Sierra de los Trigos',
    'Phone number': 555622987,
    'Birthdate': '01/09/1949',
    'driving license': {'type': 'D1', 'date': '18/04/2006'}},
   'Owner': {'DNI': '95875344M',
    'Name': 'Andres',
    'Surname': 'Matos',
    'Sec_Surname': 'Osorio',
    'Address': 'Calle Curtidores, N 161, 61231',
    'Town': 'Sierra de los Trigos',
    'Phone number': 555622987,
    'Birthdate': '01/09/1949'}},
  'Record': {'rec_ID': 201226969093,
   'file': 'E:/data/vid/20120925/10069093.avi',
   'date': '25/09/2012',
   'time': '20:13:44.000',
   'speed': 98}},
 {'_id': 'M50/-18/20121208201828000',
  'dump date': '02/10/18',
  'road': {'name': 'M50', 'speed limit': 120},
  'radar': {'mileage': 18, 'direction': 'descending', 'speed limit': 100},
  'vehicle': {'number plate': '7696UOA',
   'make': 'Peuyo',
   'model': 'Real',
   'power': 1400,
   'colour': 'gris metalizado',
   'chassis number': 'OOI78032I91035606',
   'registry date': 'Friday 9TH of March, 2007',
   'roadworthiness': '01/08/2010',
   'Driver': {'DNI': '58793740J',
    'Name': 'Honorata',
    'Surname': 'Alcantara',
    'Sec_Surname': 'Lema',
    'Address': 'Travesia del Encinar, N 23, 12969',
    'Town': 'Villanieves de la Alameda',
    'Email': 'Alci@servcorreo.uctreseme.edu',
    'Birthdate': '21/11/1972',
    'driving license': {'type': 'C', 'date': '05/10/2001'}},
   'Owner': {'DNI': '46197994X',
    'Name': 'Carlos',
    'Surname': 'Avendano',
    'Sec_Surname': 'Delgado',
    'Address': 'Calle Carpinteros, N 199, 72686',
    'Town': 'Sototorres de Arriba',
    'Email': 'Delgui@servcorreo.uctreseme.edu',
    'Birthdate': '25/08/1957'}},
  'Record': {'rec_ID': 201234369092,
   'file': 'E:/data/vid/20121208/10069092.avi',
   'date': '08/12/2012',
   'time': '20:18:28.000',
   'speed': 112},
  'Speed ticket': {'Issue date': '27/04/2018',
   'Amount': 120,
   'Currency': 'euro',
   'Debtor': {'DNI': '46197994X',
    'Name': 'Carlos',
    'Surname': 'Avendano',
    'Sec_Surname': 'Delgado',
    'Address': 'Calle Carpinteros, N 199, 72686',
    'Town': 'Sototorres de Arriba',
    'Email': 'Delgui@servcorreo.uctreseme.edu',
    'Birthdate': '25/08/1957',
    'Insolvency': 'No'},
   'State': 'appealed'}}, 
   ...
radar_tickets_collection_list = list(radar_tickets_collection.find({}))

Taking a look to two documents of the collection, just for getting an idea of how them look like.

radar_tickets_collection_list[0]
{'_id': 'M30/-31/20091205175543130',
 'dump date': '02/10/18',
 'road': {'name': 'M30', 'speed limit': 70},
 'radar': {'mileage': 31, 'direction': 'descending', 'speed limit': 50},
 'vehicle': {'number plate': '7919AEO',
  'make': 'Rinaul',
  'model': 'Fuente',
  'power': 1400,
  'colour': 'marron',
  'chassis number': 'QOI84163Q14980235',
  'registry date': 'Thursday 29TH of May, 2008',
  'roadworthiness': '22/10/2011',
  'Driver': {'DNI': '21987229B',
   'Name': 'Ana Nuria',
   'Surname': 'Mory',
   'Sec_Surname': 'Ortiz',
   'Address': 'Paseo Lope de Vega, N 156, 69676',
   'Town': 'Sotosombras del Lago',
   'Phone number': 555153476,
   'Email': 'AMory@servcorreo.uctreseme.edu',
   'Birthdate': '16/02/1991',
   'driving license': {'type': 'B', 'date': '25/06/1999'}},
  'Owner': {'DNI': '21987229B',
   'Name': 'Ana Nuria',
   'Surname': 'Mory',
   'Sec_Surname': 'Ortiz',
   'Address': 'Paseo Lope de Vega, N 156, 69676',
   'Town': 'Sotosombras del Lago',
   'Phone number': 555153476,
   'Email': 'AMory@servcorreo.uctreseme.edu',
   'Birthdate': '16/02/1991'}},
 'Record': {'rec_ID': 200933969999,
  'file': 'E:/data/vid/20091205/10069999.avi',
  'date': '05/12/2009',
  'time': '17:55:43.130',
  'speed': 50}}
radar_tickets_collection_list[0].keys()
dict_keys(['_id', 'dump date', 'road', 'radar', 'vehicle', 'Record'])
radar_tickets_collection_list[150]
{'_id': 'A5/+274/20120409212407000',
 'dump date': '02/10/18',
 'road': {'name': 'A5', 'speed limit': 120},
 'radar': {'mileage': 274, 'direction': 'ascending', 'speed limit': 100},
 'vehicle': {'number plate': '8828OIA',
  'make': 'Rinaul',
  'model': 'Manante',
  'power': 1600,
  'colour': 'gris',
  'chassis number': 'IOI77370I57501874',
  'registry date': 'Monday 4TH of September, 2006',
  'roadworthiness': [{'MOT date': '13/09/2009',
    'shortcomings': ['damaged license plate']},
   {'MOT date': '16/09/2010', 'shortcomings': ['broken hand brake']}],
  'Driver': {'DNI': '63709185J',
   'Name': 'Maria Quintina',
   'Surname': 'Babilon',
   'Sec_Surname': 'Huapaya',
   'Address': 'Travesia del Silo, N 164, 29869',
   'Town': 'San Juan de las Golondrinas',
   'Phone number': 555582881,
   'Email': 'Babika@servcorreo.uctreseme.edu',
   'Birthdate': '09/11/1991',
   'driving license': {'type': 'B', 'date': '03/05/2008'}},
  'Owner': {'DNI': '46104267P',
   'Name': 'Felicidad',
   'Surname': 'Hidroso',
   'Sec_Surname': 'Torres',
   'Address': 'Avenida Estiradores, N 167, 65776',
   'Town': 'Serrania de las Setas',
   'Phone number': 555031873,
   'Birthdate': '23/11/1959'}},
 'Record': {'rec_ID': 201210069849,
  'file': 'E:/data/vid/20120409/10069849.avi',
  'date': '09/04/2012',
  'time': '21:24:07.000',
  'speed': 108},
 'Speed ticket': {'Issue date': '27/04/2018',
  'Amount': 80,
  'Currency': 'euro',
  'Debtor': {'DNI': '46104267P',
   'Name': 'Felicidad',
   'Surname': 'Hidroso',
   'Sec_Surname': 'Torres',
   'Address': 'Avenida Estiradores, N 167, 65776',
   'Town': 'Serrania de las Setas',
   'Phone number': 555031873,
   'Birthdate': '23/11/1959',
   'Insolvency': 'No'},
  'State': 'appealed'}}
radar_tickets_collection_list[150].keys()
dict_keys(['_id', 'dump date', 'road', 'radar', 'vehicle', 'Record', 'Speed ticket'])

Query 1#

Retrieve radar recordings (whole document) with a speed ticket greater than or equal to €750. Keep only documents where the driver is also the owner of the car (the same person).

# To check for documents where Driver.DNI and Owner.DNI have the same value, you should use the $expr operator. 

query = {"$and": [{"Speed ticket.Amount": {"$gte": 750}}, 
                  {"$expr": {"$eq": ["$vehicle.Driver.DNI", "$vehicle.Owner.DNI"]}}]}
list(radar_tickets_collection.find(query))
[]

As you can see, any document is returned, because there is no one matching the query.

In order to test the query we are going to include some new documents that satisfy it.

Testing the query

  • Same owner as driver and speed ticker > 750

radar_tickets_collection.insert_one({'_id': 'A5/+274/20120409212407001',
 'dump date': '02/10/18',
 'road': {'name': 'A5', 'speed limit': 120},
 'radar': {'mileage': 274, 'direction': 'ascending', 'speed limit': 100},
 'vehicle': {'number plate': '8828OIA',
  'make': 'Rinaul',
  'model': 'Manante',
  'power': 1600,
  'colour': 'gris',
  'chassis number': 'IOI77370I57501874',
  'registry date': 'Monday 4TH of September, 2006',
  'roadworthiness': [{'MOT date': '13/09/2009',
    'shortcomings': ['damaged license plate']},
   {'MOT date': '16/09/2010', 'shortcomings': ['broken hand brake']}],
  'Driver': {'DNI': '63709185J',
   'Name': 'Maria Quintina',
   'Surname': 'Babilon',
   'Sec_Surname': 'Huapaya',
   'Address': 'Travesia del Silo, N 164, 29869',
   'Town': 'San Juan de las Golondrinas',
   'Phone number': 555582881,
   'Email': 'Babika@servcorreo.uctreseme.edu',
   'Birthdate': '09/11/1991',
   'driving license': {'type': 'B', 'date': '03/05/2008'}},
  'Owner': {'DNI': '63709185J',
   'Name': 'Maria Quintina',
   'Surname': 'Babilon',
   'Sec_Surname': 'Huapaya',
   'Address': 'Travesia del Silo, N 164, 29869',
   'Town': 'San Juan de las Golondrinas',
   'Phone number': 555582881,
   'Birthdate': '09/11/1991'}},
 'Record': {'rec_ID': 201210069849,
  'file': 'E:/data/vid/20120409/10069849.avi',
  'date': '09/04/2012',
  'time': '21:24:07.000',
  'speed': 108},
 'Speed ticket': {'Issue date': '27/04/2018',
  'Amount': 850,
  'Currency': 'euro',
  'Debtor': {'DNI': '46104267P',
   'Name': 'Felicidad',
   'Surname': 'Hidroso',
   'Sec_Surname': 'Torres',
   'Address': 'Avenida Estiradores, N 167, 65776',
   'Town': 'Serrania de las Setas',
   'Phone number': 555031873,
   'Birthdate': '23/11/1959',
   'Insolvency': 'No'},
  'State': 'appealed'}})
<pymongo.results.InsertOneResult at 0x1737153a740>
  • Same owner as driver but speed ticker < 750

radar_tickets_collection.insert_one({'_id': 'A5/+274/20120409212407002',
 'dump date': '02/10/18',
 'road': {'name': 'A5', 'speed limit': 120},
 'radar': {'mileage': 274, 'direction': 'ascending', 'speed limit': 100},
 'vehicle': {'number plate': '8828OIA',
  'make': 'Rinaul',
  'model': 'Manante',
  'power': 1600,
  'colour': 'gris',
  'chassis number': 'IOI77370I57501874',
  'registry date': 'Monday 4TH of September, 2006',
  'roadworthiness': [{'MOT date': '13/09/2009',
    'shortcomings': ['damaged license plate']},
   {'MOT date': '16/09/2010', 'shortcomings': ['broken hand brake']}],
  'Driver': {'DNI': '63709185J',
   'Name': 'Maria Quintina',
   'Surname': 'Babilon',
   'Sec_Surname': 'Huapaya',
   'Address': 'Travesia del Silo, N 164, 29869',
   'Town': 'San Juan de las Golondrinas',
   'Phone number': 555582881,
   'Email': 'Babika@servcorreo.uctreseme.edu',
   'Birthdate': '09/11/1991',
   'driving license': {'type': 'B', 'date': '03/05/2008'}},
  'Owner': {'DNI': '41223185Z',
   'Name': 'Fabio',
   'Surname': 'Scielzo',
   'Sec_Surname': 'Ortiz',
   'Address': 'Paseo de la Castellana, 22',
   'Town': 'Madrid',
   'Phone number': 552382881,
   'Birthdate': '29/08/1998'}},
 'Record': {'rec_ID': 201210069849,
  'file': 'E:/data/vid/20120409/10069849.avi',
  'date': '09/04/2012',
  'time': '21:24:07.000',
  'speed': 108},
 'Speed ticket': {'Issue date': '27/04/2018',
  'Amount': 225,
  'Currency': 'euro',
  'Debtor': {'DNI': '46104267P',
   'Name': 'Felicidad',
   'Surname': 'Hidroso',
   'Sec_Surname': 'Torres',
   'Address': 'Avenida Estiradores, N 167, 65776',
   'Town': 'Serrania de las Setas',
   'Phone number': 555031873,
   'Birthdate': '23/11/1959',
   'Insolvency': 'No'},
  'State': 'appealed'}})
<pymongo.results.InsertOneResult at 0x1738a57aa10>
  • Same owner as driver but speed ticket == 750

radar_tickets_collection.insert_one({'_id': 'A5/+274/20120409212407005',
 'dump date': '02/10/18',
 'road': {'name': 'A5', 'speed limit': 120},
 'radar': {'mileage': 274, 'direction': 'ascending', 'speed limit': 100},
 'vehicle': {'number plate': '8828OIA',
  'make': 'Rinaul',
  'model': 'Manante',
  'power': 1600,
  'colour': 'gris',
  'chassis number': 'IOI77370I57501874',
  'registry date': 'Monday 4TH of September, 2006',
  'roadworthiness': [{'MOT date': '13/09/2009',
    'shortcomings': ['damaged license plate']},
   {'MOT date': '16/09/2010', 'shortcomings': ['broken hand brake']}],
  'Driver': {'DNI': '12309185X',
   'Name': 'Pedro',
   'Surname': 'Morata',
   'Sec_Surname': 'Lago',
   'Address': 'Travesia del Silo, N 164, 29869',
   'Town': 'San Juan de las Golondrinas',
   'Phone number': 552282381,
   'Email': 'pedroML@gmail.com',
   'Birthdate': '09/11/1991',
   'driving license': {'type': 'B', 'date': '04/08/2007'}},
  'Owner': {'DNI': '12309185X',
   'Name': 'Pedro',
   'Surname': 'Morata',
   'Sec_Surname': 'Lago',
   'Address': 'Travesia del Silo, N 164, 29869',
   'Town': 'San Juan de las Golondrinas',
   'Phone number': 552282381,
   'Birthdate': '09/11/1991'}},
 'Record': {'rec_ID': 201210069849,
  'file': 'E:/data/vid/20120409/10069849.avi',
  'date': '09/04/2012',
  'time': '21:24:07.000',
  'speed': 108},
 'Speed ticket': {'Issue date': '27/04/2018',
  'Amount': 750,
  'Currency': 'euro',
  'Debtor': {'DNI': '46104267P',
   'Name': 'Felicidad',
   'Surname': 'Hidroso',
   'Sec_Surname': 'Torres',
   'Address': 'Avenida Estiradores, N 167, 65776',
   'Town': 'Serrania de las Setas',
   'Phone number': 555031873,
   'Birthdate': '23/11/1959',
   'Insolvency': 'No'},
  'State': 'appealed'}})
<pymongo.results.InsertOneResult at 0x1730abefd30>
# To check for documents where Driver.DNI and Owner.DNI have the same value, you should use the $expr operator. 
query = {"$and": [{"Speed ticket.Amount": {"$gte": 750}}, 
                  {"$expr": {"$eq": ["$vehicle.Driver.DNI", "$vehicle.Owner.DNI"]}}]}
results_query = list(radar_tickets_collection.find(query))
results_query
[{'_id': 'A5/+274/20120409212407001',
  'dump date': '02/10/18',
  'road': {'name': 'A5', 'speed limit': 120},
  'radar': {'mileage': 274, 'direction': 'ascending', 'speed limit': 100},
  'vehicle': {'number plate': '8828OIA',
   'make': 'Rinaul',
   'model': 'Manante',
   'power': 1600,
   'colour': 'gris',
   'chassis number': 'IOI77370I57501874',
   'registry date': 'Monday 4TH of September, 2006',
   'roadworthiness': [{'MOT date': '13/09/2009',
     'shortcomings': ['damaged license plate']},
    {'MOT date': '16/09/2010', 'shortcomings': ['broken hand brake']}],
   'Driver': {'DNI': '63709185J',
    'Name': 'Maria Quintina',
    'Surname': 'Babilon',
    'Sec_Surname': 'Huapaya',
    'Address': 'Travesia del Silo, N 164, 29869',
    'Town': 'San Juan de las Golondrinas',
    'Phone number': 555582881,
    'Email': 'Babika@servcorreo.uctreseme.edu',
    'Birthdate': '09/11/1991',
    'driving license': {'type': 'B', 'date': '03/05/2008'}},
   'Owner': {'DNI': '63709185J',
    'Name': 'Maria Quintina',
    'Surname': 'Babilon',
    'Sec_Surname': 'Huapaya',
    'Address': 'Travesia del Silo, N 164, 29869',
    'Town': 'San Juan de las Golondrinas',
    'Phone number': 555582881,
    'Birthdate': '09/11/1991'}},
  'Record': {'rec_ID': 201210069849,
   'file': 'E:/data/vid/20120409/10069849.avi',
   'date': '09/04/2012',
   'time': '21:24:07.000',
   'speed': 108},
  'Speed ticket': {'Issue date': '27/04/2018',
   'Amount': 850,
   'Currency': 'euro',
   'Debtor': {'DNI': '46104267P',
    'Name': 'Felicidad',
    'Surname': 'Hidroso',
    'Sec_Surname': 'Torres',
    'Address': 'Avenida Estiradores, N 167, 65776',
    'Town': 'Serrania de las Setas',
    'Phone number': 555031873,
    'Birthdate': '23/11/1959',
    'Insolvency': 'No'},
   'State': 'appealed'}},
 {'_id': 'A5/+274/20120409212407005',
  'dump date': '02/10/18',
  'road': {'name': 'A5', 'speed limit': 120},
  'radar': {'mileage': 274, 'direction': 'ascending', 'speed limit': 100},
  'vehicle': {'number plate': '8828OIA',
   'make': 'Rinaul',
   'model': 'Manante',
   'power': 1600,
   'colour': 'gris',
   'chassis number': 'IOI77370I57501874',
   'registry date': 'Monday 4TH of September, 2006',
   'roadworthiness': [{'MOT date': '13/09/2009',
     'shortcomings': ['damaged license plate']},
    {'MOT date': '16/09/2010', 'shortcomings': ['broken hand brake']}],
   'Driver': {'DNI': '12309185X',
    'Name': 'Pedro',
    'Surname': 'Morata',
    'Sec_Surname': 'Lago',
    'Address': 'Travesia del Silo, N 164, 29869',
    'Town': 'San Juan de las Golondrinas',
    'Phone number': 552282381,
    'Email': 'pedroML@gmail.com',
    'Birthdate': '09/11/1991',
    'driving license': {'type': 'B', 'date': '04/08/2007'}},
   'Owner': {'DNI': '12309185X',
    'Name': 'Pedro',
    'Surname': 'Morata',
    'Sec_Surname': 'Lago',
    'Address': 'Travesia del Silo, N 164, 29869',
    'Town': 'San Juan de las Golondrinas',
    'Phone number': 552282381,
    'Birthdate': '09/11/1991'}},
  'Record': {'rec_ID': 201210069849,
   'file': 'E:/data/vid/20120409/10069849.avi',
   'date': '09/04/2012',
   'time': '21:24:07.000',
   'speed': 108},
  'Speed ticket': {'Issue date': '27/04/2018',
   'Amount': 750,
   'Currency': 'euro',
   'Debtor': {'DNI': '46104267P',
    'Name': 'Felicidad',
    'Surname': 'Hidroso',
    'Sec_Surname': 'Torres',
    'Address': 'Avenida Estiradores, N 167, 65776',
    'Town': 'Serrania de las Setas',
    'Phone number': 555031873,
    'Birthdate': '23/11/1959',
    'Insolvency': 'No'},
   'State': 'appealed'}}]
len(results_query) # number of documents matching the query
2

Including Specific Fields:

  • Set the field to 1 to include it in the returned documents.

  • By default, the _id field is included. To exclude it, set it to 0.

Excluding Specific Fields:

  • Set the field to 0 to exclude it from the returned documents.

# To check for documents where Driver.DNI and Owner.DNI have the same value, you should use the $expr operator. 
query = {"$and": [{"Speed ticket.Amount": {"$gte": 750}}, 
                  {"$expr": {"$eq": ["$vehicle.Driver.DNI", "$vehicle.Owner.DNI"]}}]}

projection = {"vehicle.Driver.DNI": 1, "vehicle.Owner.DNI": 1, "vehicle.Owner.Name": 1, 
              "Speed ticket.Amount": 1, "_id": 0}    

results_query = list(radar_tickets_collection.find(query, projection))
results_query
[{'vehicle': {'Driver': {'DNI': '63709185J'},
   'Owner': {'DNI': '63709185J', 'Name': 'Maria Quintina'}},
  'Speed ticket': {'Amount': 850}},
 {'vehicle': {'Driver': {'DNI': '12309185X'},
   'Owner': {'DNI': '12309185X', 'Name': 'Pedro'}},
  'Speed ticket': {'Amount': 750}}]

Query 2#

Number plate of cars that have had a clearance ticket despite of being reported with some problem with the brakes in some technical inspection (MOT). You should also provide the ticket date.

Matrícula de coches que han tenido una multa de liquidación a pesar de haber sido denunciados con algún problema en los frenos en alguna inspección técnica (ITV). También debe facilitar la fecha de la multa.

  • First approach

query = {"vehicle.roadworthiness.shortcomings": {"$elemMatch": {"$regex": "brak","$options": "i"}}}
# "i" makes the regex case-insensitive

projection = {'vehicle.number plate': 1, '_id': 0}

query_results = list(radar_tickets_collection.find(query, projection))
query_results

A sample of the query results provided running the previous cell. As before, we don’t display all the results since they are too much.

[...
 {'vehicle': {'number plate': '6829IOA'}},
 {'vehicle': {'number plate': '0473AEO'}},
 {'vehicle': {'number plate': '0566EAU'}},
 {'vehicle': {'number plate': '6511IUE'}},
 {'vehicle': {'number plate': '1329EUA'}},
 {'vehicle': {'number plate': '8545AOA'}},
 {'vehicle': {'number plate': '5408EII'}},
 {'vehicle': {'number plate': '2925UEO'}},
 {'vehicle': {'number plate': '2877AIA'}},
 {'vehicle': {'number plate': '1306OEE'}},
 {'vehicle': {'number plate': '6464AIA'}},
 {'vehicle': {'number plate': '8686IUU'}},
 {'vehicle': {'number plate': '7372AEU'}},
 {'vehicle': {'number plate': '2452UEO'}},
 ...]

The number of total results:

len(query_results)
19712
  • second approach

query = {"$and": [{"vehicle.roadworthiness.shortcomings": {"$elemMatch": {"$regex": "brak","$options": "i"}}},
                 {"Speed ticket": {"$exists": True}}]}
# "i" makes the regex case-insensitive

projection = {'vehicle.number plate': 1, 'Speed ticket.Issue date': 1, '_id': 0}

query_results = list(radar_tickets_collection.find(query, projection))
query_results

A sample of the query results provided running the previous cell. As before, we don’t display all the results since they are too much.

[...
{'vehicle': {'number plate': '5303EIO'},
 'Speed ticket': {'Issue date': '27/04/2018'}},
{'vehicle': {'number plate': '9454IIE'},
 'Speed ticket': {'Issue date': '27/04/2018'}},
{'vehicle': {'number plate': '6882IUI'},
 'Speed ticket': {'Issue date': '27/04/2018'}},
{'vehicle': {'number plate': '5867AUU'},
 'Speed ticket': {'Issue date': '27/04/2018'}},
{'vehicle': {'number plate': '2512UAA'},
 'Speed ticket': {'Issue date': '27/04/2018'}},
{'vehicle': {'number plate': '5853OII'},
 'Speed ticket': {'Issue date': '27/04/2018'}},
{'vehicle': {'number plate': '9519OEE'},
 'Speed ticket': {'Issue date': '27/04/2018'}},
{'vehicle': {'number plate': '4334OUU'},
 'Speed ticket': {'Issue date': '27/04/2018'}},
{'vehicle': {'number plate': '0122IIA'},
 'Speed ticket': {'Issue date': '27/04/2018'}},
...]

The number of total results:

len(query_results)
17447
  • Third approach

query = {"$and": [{"vehicle.roadworthiness.shortcomings": {"$elemMatch": {"$regex": "brak","$options": "i"}}},
                 {"Clearance ticket": {"$exists": True}}]}
# "i" makes the regex case-insensitive

projection = {'vehicle.number plate': 1, 'Clearance ticket.Issue date': 1, '_id': 0}

query_results = list(radar_tickets_collection.find(query, projection))
query_results

A sample of the query results provided running the previous cell.

[...
 {'vehicle': {'number plate': '4286AUA'},
  'Clearance ticket': {'Issue date': '27/04/2018'}},
 {'vehicle': {'number plate': '5350OAE'},
  'Clearance ticket': {'Issue date': '27/04/2018'}},
 {'vehicle': {'number plate': '5350OAE'},
  'Clearance ticket': {'Issue date': '27/04/2018'}},
 {'vehicle': {'number plate': '4640EUA'},
  'Clearance ticket': {'Issue date': '27/04/2018'}},
 {'vehicle': {'number plate': '1369OAU'},
  'Clearance ticket': {'Issue date': '27/04/2018'}},
 {'vehicle': {'number plate': '1369OAU'},
  'Clearance ticket': {'Issue date': '27/04/2018'}},
 {'vehicle': {'number plate': '5329UEU'},
  'Clearance ticket': {'Issue date': '27/04/2018'}}
  ]

The number of total results:

len(query_results)
188
  • Fourth approach

query = {"$and": [{"vehicle.roadworthiness.shortcomings": {"$elemMatch": {"$regex": "brak","$options": "i"}}},
                 {"Clearance ticket": {"$exists": True}}]}
# "i" makes the regex case-insensitive

projection = {'vehicle.number plate': 1, 'Clearance ticket.previous car.date': 1, '_id': 0}

query_results = list(radar_tickets_collection.find(query, projection))
query_results
[...
 {'vehicle': {'number plate': '4286AUA'},
  'Clearance ticket': {'previous car': {'date': '04/12/2016'}}},
 {'vehicle': {'number plate': '5350OAE'},
  'Clearance ticket': {'previous car': {'date': '25/06/2011'}}},
 {'vehicle': {'number plate': '5350OAE'},
  'Clearance ticket': {'previous car': {'date': '24/06/2017'}}},
 {'vehicle': {'number plate': '4640EUA'},
  'Clearance ticket': {'previous car': {'date': '21/09/2017'}}},
 {'vehicle': {'number plate': '1369OAU'},
  'Clearance ticket': {'previous car': {'date': '03/03/2011'}}},
 {'vehicle': {'number plate': '1369OAU'},
  'Clearance ticket': {'previous car': {'date': '08/07/2016'}}},
 {'vehicle': {'number plate': '5329UEU'},
  'Clearance ticket': {'previous car': {'date': '27/06/2017'}}}
  ]

Testing results: inserting documents that match the query but have an ‘Issue date’ different to ‘27/04/2018’

radar_tickets_collection.insert_one({'_id': 'A1/+218/20140126204716001',
 'dump date': '02/10/18',
 'road': {'name': 'A1', 'speed limit': 120},
 'radar': {'mileage': 218, 'direction': 'ascending', 'speed limit': 100},
 'vehicle': {'number plate': '4483AIO',
  'make': 'Merche',
  'model': 'Berlin',
  'power': 1400,
  'colour': 'negro metalizado',
  'chassis number': 'IOQ90947Q76497060',
  'registry date': 'Wednesday 5TH of October, 2005',
  'roadworthiness': [{'MOT date': '07/02/2010'},
   {'MOT date': '11/01/2010'},
   {'MOT date': '24/01/2012',
    'shortcomings': ['damaged license plate',
     'damaged chassis',
     'insufficient braking']}],
  'Driver': {'DNI': '26322160V',
   'Name': 'Luis',
   'Surname': 'Pedreros',
   'Sec_Surname': 'Sarrin',
   'Address': 'Calle de los Jazmines, N 39, 64503',
   'Town': 'Sotosetas',
   'Phone number': 555561625,
   'Email': 'Whooper@servcorreo.uctreseme.edu',
   'Birthdate': '18/03/1977',
   'driving license': {'type': 'B', 'date': '20/08/1989'}},
  'Owner': {'DNI': '15742171M',
   'Name': 'Maria de la Consolacion',
   'Surname': 'Garcia',
   'Sec_Surname': 'Pomar',
   'Address': 'Calle del Silo, N 28, 64122',
   'Town': 'Sotolobos',
   'Phone number': 555985301,
   'Birthdate': '30/01/1961'}},
 'Record': {'rec_ID': 201402669351,
  'file': 'E:/data/vid/20140126/10069351.avi',
  'date': '26/01/2014',
  'time': '20:47:16.000',
  'speed': 95},
 'Clearance ticket': {'previous car': {'number plate': '6560AII',
   'make': 'Fork',
   'model': 'Tetrox',
   'date': '26/01/2014',
   'time': '20:47:15.000'},
  'Issue date': '05/11/2020',
  'Amount': 188,
  'Currency': 'euro',
  'Debtor': {'DNI': '15742171M',
   'Name': 'Maria de la Consolacion',
   'Surname': 'Garcia',
   'Sec_Surname': 'Pomar',
   'Address': 'Calle del Silo, N 28, 64122',
   'Town': 'Sotolobos',
   'Phone number': 555985301,
   'Birthdate': '30/01/1961',
   'Insolvency': 'No'},
  'Pay date': '03/05/2018',
  'Pay type': 'bank transfer',
  'State': 'fullfilled'}})
<pymongo.results.InsertOneResult at 0x1738af6d570>
query = {"$and": [{"vehicle.roadworthiness.shortcomings": {"$elemMatch": {"$regex": "brak","$options": "i"}}},
                 {"Clearance ticket": {"$exists": True}}]}
# "i" makes the regex case-insensitive

projection = {'vehicle.number plate': 1, 'Clearance ticket.Issue date': 1, '_id': 0}

query_results = list(radar_tickets_collection.find(query, projection))
query_results

A sample of the query results provided running the previous cell.

[...
 {'vehicle': {'number plate': '1131AOO'},
  'Clearance ticket': {'Issue date': '27/04/2018'}},
 {'vehicle': {'number plate': '4286AUA'},
  'Clearance ticket': {'Issue date': '27/04/2018'}},
 {'vehicle': {'number plate': '5350OAE'},
  'Clearance ticket': {'Issue date': '27/04/2018'}},
 {'vehicle': {'number plate': '5350OAE'},
  'Clearance ticket': {'Issue date': '27/04/2018'}},
 {'vehicle': {'number plate': '4640EUA'},
  'Clearance ticket': {'Issue date': '27/04/2018'}},
 {'vehicle': {'number plate': '1369OAU'},
  'Clearance ticket': {'Issue date': '27/04/2018'}},
 {'vehicle': {'number plate': '1369OAU'},
  'Clearance ticket': {'Issue date': '27/04/2018'}},
 {'vehicle': {'number plate': '5329UEU'},
  'Clearance ticket': {'Issue date': '27/04/2018'}},
 {'vehicle': {'number plate': '4483AIO'},
  'Clearance ticket': {'Issue date': '05/11/2020'}}
  ]

Query 3#

For each month of the year, group the drivers who were born in that month, and calculate their average speed and their total amount of tickets. Naturally, the output has 12 documents (sort from January to December), unless some month has no driver recorded…

Tip: nobody says you have to display the name of the months (you can use a number…)

  • First approach

query = {"vehicle.Driver.Birthdate": {"$regex": "/11/", "$options": "i"}}

projection = {'vehicle.Driver.DNI': 1, 'vehicle.Driver.Birthdate': 1, 'Record.speed': 1, 'Speed ticket.amount': 1, '_id': 0}

query_results = list(radar_tickets_collection.find(query, projection))

query_results

  • Second approach

pipeline = [{"$project": {"month_of_birth": {"$substr": ["$vehicle.Driver.Birthdate", 3, 2]}, # Extract month from Birthdate 
                          "speed": "$Record.speed", # Extract speed from Record
                          "ticket_amount": "$Speed ticket.Amount" # Extract ticket amount (in €)
                          } 
            }, 
            {"$group": {"_id": "$month_of_birth",  # Group by month
                        "average_speed": {"$avg": "$speed"},  # Calculate average speed
                        "total_amount_tickets": {"$sum": "$ticket_amount"},  # Calculate total tickets amount 
                        }
            },
            {"$sort": {"_id": 1}} # Sort by month in ascending order
            ]  

query_results = list(radar_tickets_collection.aggregate(pipeline))
query_results
[{'_id': '01',
  'average_speed': 104.76378169790519,
  'total_amount_tickets': 538990},
 {'_id': '02',
  'average_speed': 104.62826797385621,
  'total_amount_tickets': 517150},
 {'_id': '03',
  'average_speed': 104.79309506442986,
  'total_amount_tickets': 607850},
 {'_id': '04',
  'average_speed': 104.36586257309942,
  'total_amount_tickets': 817810},
 {'_id': '05',
  'average_speed': 103.52604075944957,
  'total_amount_tickets': 850230},
 {'_id': '06',
  'average_speed': 104.64609236234459,
  'total_amount_tickets': 651570},
 {'_id': '07',
  'average_speed': 103.49527576356844,
  'total_amount_tickets': 664580},
 {'_id': '08',
  'average_speed': 104.94220509780676,
  'total_amount_tickets': 511870},
 {'_id': '09',
  'average_speed': 105.54202116621705,
  'total_amount_tickets': 728210},
 {'_id': '10',
  'average_speed': 103.83906931652933,
  'total_amount_tickets': 285580},
 {'_id': '11',
  'average_speed': 105.37641288121135,
  'total_amount_tickets': 698875},
 {'_id': '12',
  'average_speed': 104.39550029603316,
  'total_amount_tickets': 483750}]
len(query_results)
12

Some explanations:

  • {”\(substr": ["\)vehicle.Driver.Birthdate”, 3, 2]}

    • “$substr”: This is the MongoDB operator used to get a substring of a string field.

    • “$vehicle.Driver.Birthdate”: This is the field from which the substring will be extracted. Assuming that Birthdate is stored in the format “dd/mm/yyyy”, this operation is targeting the month portion.

    • 3: This is the starting index of the substring. Indices start at 0, so an index of 3 points to the first character of the month (e.g., in “16/02/1991”, it points to the first “0” in “02”).

    • 2: This is the length of the substring. Since a month in “dd/mm/yyyy” format is represented by two characters (like “02” for February), the length is 2.

Query 4#

For each road, percentages of recordings with a ticket of each type (regarding total observations in that road).

Tip: each recording can have several tickets, but at most one of each type

Para cada carretera, porcentajes de registros con una multa de cada tipo (respecto al total de observaciones en esa carretera).

pipeline = [
    {
        "$project": {
            "road_name": "$road.name",
            "clearance_ticket_exists": {"$cond": [{"$ifNull": ["$Clearance ticket", False]}, True, False]},
            "speed_ticket_exists": {"$cond": [{"$ifNull": ["$Speed ticket", False]}, True, False]},
            "stretch_ticket_exists": {"$cond": [{"$ifNull": ["$Stretch ticket", False]}, True, False]}
        }
    },
    {
        "$group": {
            "_id": "$road_name",
            "total_records": {"$sum": 1},
            "clearance_ticket_count": {"$sum": {"$cond": ["$clearance_ticket_exists", 1, 0]}},
            "speed_ticket_count" : {"$sum": {"$cond": ["$speed_ticket_exists", 1, 0]}},
            "stretch_ticket_count" : {"$sum": {"$cond": ["$stretch_ticket_exists", 1, 0]}}
        }
    },
    {
        "$addFields": {
            "clearance_tickets_prop": {"$round": [{"$divide": ["$clearance_ticket_count", "$total_records"]}, 4]},
            "speed_tickets_prop": {"$round": [{"$divide": ["$speed_ticket_count", "$total_records"]}, 4]},
            "stretch_tickets_prop": {"$round": [{"$divide": ["$stretch_ticket_count", "$total_records"]}, 4]},
        }
    },
    {
    "$addFields": {
        "no_ticket_prop": {"$round": [{"$subtract": [1, {"$add": ["$clearance_tickets_prop", 
                                                                  "$speed_tickets_prop", 
                                                                  "$stretch_tickets_prop"]}]}, 4]}
        }
    },
    {
        "$project": {"road_name": "$_id",  
                     "clearance_tickets_prop": 1,
                     "speed_tickets_prop": 1,
                     "stretch_tickets_prop": 1, 
                     "no_ticket_prop": 1}
    }
]

query_results = list(radar_tickets_collection.aggregate(pipeline))
query_results
[{'_id': 'A6',
  'clearance_tickets_prop': 0.0101,
  'speed_tickets_prop': 0.866,
  'stretch_tickets_prop': 0.0101,
  'no_ticket_prop': 0.1138,
  'road_name': 'A6'},
 {'_id': 'M40',
  'clearance_tickets_prop': 0.0108,
  'speed_tickets_prop': 0.8877,
  'stretch_tickets_prop': 0.0045,
  'no_ticket_prop': 0.097,
  'road_name': 'M40'},
 {'_id': 'A2',
  'clearance_tickets_prop': 0.0097,
  'speed_tickets_prop': 0.8914,
  'stretch_tickets_prop': 0.0131,
  'no_ticket_prop': 0.0858,
  'road_name': 'A2'},
 {'_id': 'A4',
  'clearance_tickets_prop': 0.0081,
  'speed_tickets_prop': 0.9094,
  'stretch_tickets_prop': 0.0097,
  'no_ticket_prop': 0.0728,
  'road_name': 'A4'},
 {'_id': 'M30',
  'clearance_tickets_prop': 0.0098,
  'speed_tickets_prop': 0.9018,
  'stretch_tickets_prop': 0.0082,
  'no_ticket_prop': 0.0802,
  'road_name': 'M30'},
 {'_id': 'M50',
  'clearance_tickets_prop': 0.0064,
  'speed_tickets_prop': 0.8835,
  'stretch_tickets_prop': 0.0074,
  'no_ticket_prop': 0.1027,
  'road_name': 'M50'},
 {'_id': 'A3',
  'clearance_tickets_prop': 0.0094,
  'speed_tickets_prop': 0.8809,
  'stretch_tickets_prop': 0.0138,
  'no_ticket_prop': 0.0959,
  'road_name': 'A3'},
 {'_id': 'A5',
  'clearance_tickets_prop': 0.0069,
  'speed_tickets_prop': 0.8608,
  'stretch_tickets_prop': 0.0097,
  'no_ticket_prop': 0.1226,
  'road_name': 'A5'},
 {'_id': 'A1',
  'clearance_tickets_prop': 0.0114,
  'speed_tickets_prop': 0.8739,
  'stretch_tickets_prop': 0.0077,
  'no_ticket_prop': 0.107,
  'road_name': 'A1'},
 {'_id': 'M45',
  'clearance_tickets_prop': 0.009,
  'speed_tickets_prop': 0.8797,
  'stretch_tickets_prop': 0.003,
  'no_ticket_prop': 0.1083,
  'road_name': 'M45'}]

Query 5#

For each type of a problem (in a MOT), specify the most frequent make and model (be careful: the same car could have several recordings…).

Tip: make and model is an identifier (can be a single “column”)

  • First approach

pipeline = [
    # Unwind the roadworthiness array
    {"$unwind": "$vehicle.roadworthiness"},
    
    # Unwind the shortcomings array
    {"$unwind": "$vehicle.roadworthiness.shortcomings"}
    
]

results_query = list(radar_tickets_collection.aggregate(pipeline))
results_query

A sample of the query results provided running the previous cell.

[...
{'_id': 'A2/+10/20101127190005820',
  'dump date': '02/10/18',
  'road': {'name': 'A2', 'speed limit': 120},
  'radar': {'mileage': 10, 'direction': 'ascending', 'speed limit': 100},
  'vehicle': {'number plate': '6525AEI',
   'make': 'Zitron',
   'model': 'Lemmon',
   'power': 1400,
   'colour': 'azul',
   'chassis number': 'OIO64678O93280477',
   'registry date': 'Tuesday 30TH of March, 2004',
   'roadworthiness': {'MOT date': '06/11/2011',
    'shortcomings': 'broken hand brake'},
   'Driver': {'DNI': '87980168Q',
    'Name': 'Jesus Diego',
    'Surname': 'Bullon',
    'Sec_Surname': 'Wong',
    'Address': 'Calle San Millan, N 27, 35739',
    'Town': 'Valseca del Secano',
    'Phone number': 555065167,
    'Email': 'Kiko@servcorreo.uctreseme.edu',
    'Birthdate': '27/06/1955',
    'driving license': {'type': 'D', 'date': '19/06/2004'}},
   'Owner': {'DNI': '87980168Q',
    'Name': 'Jesus Diego',
    'Surname': 'Bullon',
    'Sec_Surname': 'Wong',
    'Address': 'Calle San Millan, N 27, 35739',
    'Town': 'Valseca del Secano',
    'Phone number': 555065167,
    'Email': 'Kiko@servcorreo.uctreseme.edu',
    'Birthdate': '27/06/1955'}},
  'Record': {'rec_ID': 201033169742,
   'file': 'E:/data/vid/20101127/10069742.avi',
   'date': '27/11/2010',
   'time': '19:00:05.820',
   'speed': 116},
  'Speed ticket': {'Issue date': '27/04/2018',
   'Amount': 160,
   'Currency': 'euro',
   'Debtor': {'DNI': '87980168Q',
    'Name': 'Jesus Diego',
    'Surname': 'Bullon',
    'Sec_Surname': 'Wong',
    'Address': 'Calle San Millan, N 27, 35739',
    'Town': 'Valseca del Secano',
    'Phone number': 555065167,
    'Email': 'Kiko@servcorreo.uctreseme.edu',
    'Birthdate': '27/06/1955',
    'Insolvency': 'No'},
   'Pay date': '09/05/2018',
   'Pay type': 'cash',
   'State': 'fullfilled'}},
 {'_id': 'A4/+54/20110604133704990',
  'dump date': '02/10/18',
  'road': {'name': 'A4', 'speed limit': 120},
  'radar': {'mileage': 54, 'direction': 'ascending', 'speed limit': 100},
  'vehicle': {'number plate': '2347OUI',
   'make': 'Peuyo',
   'model': 'Natural',
   'power': 1400,
   'colour': 'rojo metalizado',
   'chassis number': 'IIQ34933Q76160605',
   'registry date': 'Sunday 10TH of July, 2005',
   'roadworthiness': {'MOT date': '17/01/2010',
    'shortcomings': 'windshield in poor condition'},
   'Driver': {'DNI': '31414734T',
    'Name': 'Maria Clotilde',
    'Surname': 'Mendoza',
    'Sec_Surname': 'Gomez',
    'Address': 'Avenida de los alamos, N 68, 11006',
    'Town': 'Sotolemures de la Noceda',
    'Phone number': 555654658,
    'Email': 'Cloty@servcorreo.uctreseme.edu',
    'Birthdate': '25/05/1951',
    'driving license': {'type': 'C', 'date': '01/10/2002'}},
   'Owner': {'DNI': '32609826N',
    'Name': 'Cayetano',
    'Surname': 'Almeida',
    'Sec_Surname': 'Kuyeng',
    'Address': 'Travesia Estiradores, N 96, 78109',
    'Town': 'Villachimeneas de los Viajeros',
    'Phone number': 555256042,
    'Birthdate': '29/10/1968'}},
  'Record': {'rec_ID': 201115569741,
   'file': 'E:/data/vid/20110604/10069741.avi',
   'date': '04/06/2011',
   'time': '13:37:04.990',
   'speed': 118},
  'Speed ticket': {'Issue date': '27/04/2018',
   'Amount': 180,
   'Currency': 'euro',
   'Debtor': {'DNI': '32609826N',
    'Name': 'Cayetano',
    'Surname': 'Almeida',
    'Sec_Surname': 'Kuyeng',
    'Address': 'Travesia Estiradores, N 96, 78109',
    'Town': 'Villachimeneas de los Viajeros',
    'Phone number': 555256042,
    'Birthdate': '29/10/1968',
    'Insolvency': 'No'},
   'Pay date': '07/05/2018',
   'Pay type': 'cash',
   'State': 'fullfilled'}},
 ...]

Understanding $unwind operation:

[
  {
  "_id": 1,
  "items": ["A", "B", "C"]
  },
  {
   "_id": 2,
  "items": ["H", "J", "K"]   
  }
]

###############################

{ "$unwind": "$items" }

###############################

{ "_id": 1, "items": "A" }
{ "_id": 1, "items": "B" }
{ "_id": 1, "items": "C" }
{ "_id": 2, "items": "H" }
{ "_id": 2, "items": "J" }
{ "_id": 2, "items": "K" }
{'_id': 2, 'items': 'K'}
  • Second approach

pipeline = [
    # Unwind the roadworthiness array
    {"$unwind": "$vehicle.roadworthiness"},
    
    # Unwind the shortcomings array
    {"$unwind": "$vehicle.roadworthiness.shortcomings"},
    
    # Group by shortcomings and vehicle make-model to count occurrences
    {
        "$group": {
            "_id": {
                "shortcoming": "$vehicle.roadworthiness.shortcomings",
                "make": "$vehicle.make",
                "model": "$vehicle.model"
            },
            "count": {"$sum": 1}
        }
    },
    
    # Sort by shortcoming and descending count
    {
        "$sort": {
            "_id.shortcoming": 1,
            "count": -1
        }
    }
]

results_query = list(radar_tickets_collection.aggregate(pipeline))
results_query

A sample of the query results provided running the previous cell.

[...,
 {'_id': {'shortcoming': 'worn tires', 'make': 'Peuyo', 'model': 'Entero'},
  'count': 283},
 {'_id': {'shortcoming': 'worn tires', 'make': 'Rinaul', 'model': 'Arroyo'},
  'count': 281},
 {'_id': {'shortcoming': 'worn tires', 'make': 'Rinaul', 'model': 'Manante'},
  'count': 219},
 {'_id': {'shortcoming': 'worn tires', 'make': 'Escola', 'model': 'Tercius'},
  'count': 209},
 {'_id': {'shortcoming': 'worn tires', 'make': 'Bemev', 'model': 'K-brio'},
  'count': 179},
 {'_id': {'shortcoming': 'worn tires', 'make': 'Escola', 'model': 'Cuartus'},
  'count': 136},
 {'_id': {'shortcoming': 'worn tires', 'make': 'Fork', 'model': 'Bipua'},
  'count': 127},
 {'_id': {'shortcoming': 'worn tires', 'make': 'Fork', 'model': 'Tridente'},
  'count': 65}
]
  • Third approach

pipeline = [
    # Unwind the roadworthiness array
    {"$unwind": "$vehicle.roadworthiness"},
    
    # Unwind the shortcomings array
    {"$unwind": "$vehicle.roadworthiness.shortcomings"},
    
    # Group by shortcomings and vehicle make-model to count occurrences
    {
        "$group": {
            "_id": {
                "shortcoming": "$vehicle.roadworthiness.shortcomings",
                "make": "$vehicle.make",
                "model": "$vehicle.model"
            },
            "count": {"$sum": 1}
        }
    },
    
    # Sort by shortcoming and descending count
    {
        "$sort": {
            "_id.shortcoming": 1,
            "count": -1
        }
    },
    
    # Group by shortcoming to get the most frequent make and model
    {
        "$group": {
            "_id": "$_id.shortcoming",
            "most_frequent_make": {"$first": "$_id.make"},
            "most_frequent_model": {"$first": "$_id.model"},
            "count": {"$first": "$count"}
        }
    },
    
    # Optionally, sort by the shortcoming for better readability
    {
        "$sort": {"_id": 1}
    }
]

results_query = list(radar_tickets_collection.aggregate(pipeline))
results_query
[{'_id': 'accessories not allowed',
  'most_frequent_make': 'Bemev',
  'most_frequent_model': 'Berlin',
  'count': 805},
 {'_id': 'broken hand brake',
  'most_frequent_make': 'Rinaul',
  'most_frequent_model': 'Charco',
  'count': 779},
 {'_id': 'broken headlight',
  'most_frequent_make': 'Bemev',
  'most_frequent_model': 'Berlin',
  'count': 785},
 {'_id': 'cirulillo  of distribuitor failure',
  'most_frequent_make': 'Bemev',
  'most_frequent_model': 'Berlin',
  'count': 792},
 {'_id': 'damaged chassis',
  'most_frequent_make': 'Bemev',
  'most_frequent_model': 'Berlin',
  'count': 785},
 {'_id': 'damaged license plate',
  'most_frequent_make': 'Rinaul',
  'most_frequent_model': 'Charco',
  'count': 783},
 {'_id': 'defective bulb',
  'most_frequent_make': 'Bemev',
  'most_frequent_model': 'Berlin',
  'count': 800},
 {'_id': 'excess smoke',
  'most_frequent_make': 'Bemev',
  'most_frequent_model': 'Berlin',
  'count': 777},
 {'_id': 'exhaust pipe',
  'most_frequent_make': 'Bemev',
  'most_frequent_model': 'Berlin',
  'count': 775},
 {'_id': 'improperly regulated lights',
  'most_frequent_make': 'Bemev',
  'most_frequent_model': 'Berlin',
  'count': 784},
 {'_id': 'insufficient braking',
  'most_frequent_make': 'Bemev',
  'most_frequent_model': 'Berlin',
  'count': 814},
 {'_id': 'minor faults',
  'most_frequent_make': 'Bemev',
  'most_frequent_model': 'Berlin',
  'count': 788},
 {'_id': 'spoiled engine',
  'most_frequent_make': 'Rinaul',
  'most_frequent_model': 'Charco',
  'count': 795},
 {'_id': 'windshield in poor condition',
  'most_frequent_make': 'Bemev',
  'most_frequent_model': 'Berlin',
  'count': 781},
 {'_id': 'worn tires',
  'most_frequent_make': 'Bemev',
  'most_frequent_model': 'Berlin',
  'count': 781}]

Query 6#

Build two lists (in the same query): the “Fast” drivers (those in the top quartile of average speed, while in the bottom quartile of summation of tickets’ amount); and the “Furious” (just the opposite, top ticketing and bottom racing).

# Step 1: Calculate top quartile of average speed

pipeline_speed = [
    {
        "$group": {
            "_id": "$vehicle.Driver.DNI",
            "avg_speed": {"$avg": "$Record.speed"}
        }
    },
    {
        "$project": {
            "avg_speed": {"$round": ["$avg_speed", 4]}
        }
    }
]

results_speed = list(radar_tickets_collection.aggregate(pipeline_speed))
avg_speed_values = [x['avg_speed'] for x in results_speed]
avg_speed_quantile_75 = np.quantile(avg_speed_values, 0.75)
avg_speed_quantile_25 = np.quantile(avg_speed_values, 0.25)


# Step 2: Calculate bottom quartile of summation of tickets

pipeline_tickets = [
    {
        "$group": {
            "_id": "$vehicle.Driver.DNI",
            "avg_ticket_amount": {"$avg": "$Speed ticket.amount"}
        }
    },
    {
        "$project": {
            "avg_ticket_amount": {"$round": ["$avg_ticket_amount", 4]}
        }
    }
]

results_ticket_amount = list(radar_tickets_collection.aggregate(pipeline_tickets))
avg_ticket_amount_values = [x['avg_speed'] for x in results_speed]
avg_ticket_amount_quantile_75 = np.quantile(avg_ticket_amount_values, 0.75)
avg_ticket_amount_quantile_25 = np.quantile(avg_ticket_amount_values, 0.25)


# Step 3: Computing the fast drivers

fast_drivers_query = {"$and": [{"Record.speed": {"$gte": avg_speed_quantile_75}},
                               {"Speed ticket.Amount": {"$lte": avg_ticket_amount_quantile_25}}]}

fast_drivers_projection = {"vehicle.Driver.DNI": 1, "Record.speed": 1, "Speed ticket.Amount": 1, "_id": 0}   

fast_drivers = list(radar_tickets_collection.find(fast_drivers_query, fast_drivers_projection).sort("Record.speed", -1))


# Step 4: Computing the furious drivers

furious_drivers_query = {"$and": [{"Record.speed": {"$lte": avg_speed_quantile_25}},
                               {"Speed ticket.Amount": {"$gte": avg_ticket_amount_quantile_75}}]}

furious_drivers_projection = {"vehicle.Driver.DNI": 1, "Record.speed": 1, "Speed ticket.Amount": 1, "_id": 0}   

furious_drivers = list(radar_tickets_collection.find(furious_drivers_query, furious_drivers_projection).sort("Speed ticket.Amount", -1))
fast_drivers

A sample of the query results provided running the previous cell.

[...,
 {'vehicle': {'Driver': {'DNI': '76150280V'}},
  'Record': {'speed': 110},
  'Speed ticket': {'Amount': 100}},
 {'vehicle': {'Driver': {'DNI': '80498155H'}},
  'Record': {'speed': 110},
  'Speed ticket': {'Amount': 100}},
 {'vehicle': {'Driver': {'DNI': '88900195C'}},
  'Record': {'speed': 110},
  'Speed ticket': {'Amount': 100}},
 {'vehicle': {'Driver': {'DNI': '58444159D'}},
  'Record': {'speed': 110},
  'Speed ticket': {'Amount': 100}},
 {'vehicle': {'Driver': {'DNI': '55533072D'}},
  'Record': {'speed': 110},
  'Speed ticket': {'Amount': 100}},
 ...]
fast_drivers[-5:-1]
[{'vehicle': {'Driver': {'DNI': '31414734T'}},
  'Record': {'speed': 107},
  'Speed ticket': {'Amount': 70}},
 {'vehicle': {'Driver': {'DNI': '23939454L'}},
  'Record': {'speed': 107},
  'Speed ticket': {'Amount': 70}},
 {'vehicle': {'Driver': {'DNI': '65871451A'}},
  'Record': {'speed': 107},
  'Speed ticket': {'Amount': 70}},
 {'vehicle': {'Driver': {'DNI': '74026572Z'}},
  'Record': {'speed': 107},
  'Speed ticket': {'Amount': 70}}]
len(fast_drivers)
3301
furious_drivers

A sample of the query results provided running the previous cell.

[...,
 {'vehicle': {'Driver': {'DNI': '78455829T'}},
  'Record': {'speed': 100},
  'Speed ticket': {'Amount': 200}},
 {'vehicle': {'Driver': {'DNI': '42038947F'}},
  'Record': {'speed': 100},
  'Speed ticket': {'Amount': 200}},
 {'vehicle': {'Driver': {'DNI': '25877835Y'}},
  'Record': {'speed': 100},
  'Speed ticket': {'Amount': 200}},
 {'vehicle': {'Driver': {'DNI': '45191051F'}},
  'Record': {'speed': 70},
  'Speed ticket': {'Amount': 200}},
 {'vehicle': {'Driver': {'DNI': '58793740J'}},
  'Record': {'speed': 100},
  'Speed ticket': {'Amount': 200}},
 ...]
furious_drivers[-5:-1]
[{'vehicle': {'Driver': {'DNI': '19502956Z'}},
  'Record': {'speed': 61},
  'Speed ticket': {'Amount': 110}},
 {'vehicle': {'Driver': {'DNI': '20304333W'}},
  'Record': {'speed': 91},
  'Speed ticket': {'Amount': 110}},
 {'vehicle': {'Driver': {'DNI': '47591599E'}},
  'Record': {'speed': 91},
  'Speed ticket': {'Amount': 110}},
 {'vehicle': {'Driver': {'DNI': '17817924P'}},
  'Record': {'speed': 61},
  'Speed ticket': {'Amount': 110}}]
len(furious_drivers)
10047