Comparison with SQL¶
Since many potential pandas users have some familiarity with SQL, this page is meant to provide some examples of how various SQL operations would be performed using pandas.
If you’re new to pandas, you might want to first read through 10 Minutes to pandas to familiarize yourself with the library.
As is customary, we import pandas and numpy as follows:
In [1]: import pandas as pd
In [2]: import numpy as np
Most of the examples will utilize the tips dataset found within pandas tests. We’ll read
the data into a DataFrame called tips and assume we have a database table of the same name and
structure.
In [3]: url = 'https://raw.github.com/pandas-dev/pandas/master/pandas/tests/data/tips.csv'
In [4]: tips = pd.read_csv(url)
---------------------------------------------------------------------------
gaierror Traceback (most recent call last)
/usr/lib64/python3.4/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
1182 try:
-> 1183 h.request(req.get_method(), req.selector, req.data, headers)
1184 except OSError as err: # timeout error
/usr/lib64/python3.4/http/client.py in request(self, method, url, body, headers)
1136 """Send a complete request to the server."""
-> 1137 self._send_request(method, url, body, headers)
1138
/usr/lib64/python3.4/http/client.py in _send_request(self, method, url, body, headers)
1181 body = body.encode('iso-8859-1')
-> 1182 self.endheaders(body)
1183
/usr/lib64/python3.4/http/client.py in endheaders(self, message_body)
1132 raise CannotSendHeader()
-> 1133 self._send_output(message_body)
1134
/usr/lib64/python3.4/http/client.py in _send_output(self, message_body)
962 message_body = None
--> 963 self.send(msg)
964 if message_body is not None:
/usr/lib64/python3.4/http/client.py in send(self, data)
897 if self.auto_open:
--> 898 self.connect()
899 else:
/usr/lib64/python3.4/http/client.py in connect(self)
1278
-> 1279 super().connect()
1280
/usr/lib64/python3.4/http/client.py in connect(self)
870 self.sock = self._create_connection((self.host,self.port),
--> 871 self.timeout, self.source_address)
872
/usr/lib64/python3.4/socket.py in create_connection(address, timeout, source_address)
497 err = None
--> 498 for res in getaddrinfo(host, port, 0, SOCK_STREAM):
499 af, socktype, proto, canonname, sa = res
/usr/lib64/python3.4/socket.py in getaddrinfo(host, port, family, type, proto, flags)
536 addrlist = []
--> 537 for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
538 af, socktype, proto, canonname, sa = res
gaierror: [Errno -3] Temporary failure in name resolution
During handling of the above exception, another exception occurred:
URLError Traceback (most recent call last)
<ipython-input-4-8ab2297b7141> in <module>()
----> 1 tips = pd.read_csv(url)
~/rpmbuild/BUILD/pandas-0.22.0/pandas/io/parsers.py in parser_f(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, squeeze, prefix, mangle_dupe_cols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, dayfirst, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, escapechar, comment, encoding, dialect, tupleize_cols, error_bad_lines, warn_bad_lines, skipfooter, skip_footer, doublequote, delim_whitespace, as_recarray, compact_ints, use_unsigned, low_memory, buffer_lines, memory_map, float_precision)
707 skip_blank_lines=skip_blank_lines)
708
--> 709 return _read(filepath_or_buffer, kwds)
710
711 parser_f.__name__ = name
~/rpmbuild/BUILD/pandas-0.22.0/pandas/io/parsers.py in _read(filepath_or_buffer, kwds)
431 compression = _infer_compression(filepath_or_buffer, compression)
432 filepath_or_buffer, _, compression = get_filepath_or_buffer(
--> 433 filepath_or_buffer, encoding, compression)
434 kwds['compression'] = compression
435
~/rpmbuild/BUILD/pandas-0.22.0/pandas/io/common.py in get_filepath_or_buffer(filepath_or_buffer, encoding, compression)
188
189 if _is_url(filepath_or_buffer):
--> 190 req = _urlopen(filepath_or_buffer)
191 content_encoding = req.headers.get('Content-Encoding', None)
192 if content_encoding == 'gzip':
/usr/lib64/python3.4/urllib/request.py in urlopen(url, data, timeout, cafile, capath, cadefault, context)
159 else:
160 opener = _opener
--> 161 return opener.open(url, data, timeout)
162
163 def install_opener(opener):
/usr/lib64/python3.4/urllib/request.py in open(self, fullurl, data, timeout)
462 req = meth(req)
463
--> 464 response = self._open(req, data)
465
466 # post-process response
/usr/lib64/python3.4/urllib/request.py in _open(self, req, data)
480 protocol = req.type
481 result = self._call_chain(self.handle_open, protocol, protocol +
--> 482 '_open', req)
483 if result:
484 return result
/usr/lib64/python3.4/urllib/request.py in _call_chain(self, chain, kind, meth_name, *args)
440 for handler in handlers:
441 func = getattr(handler, meth_name)
--> 442 result = func(*args)
443 if result is not None:
444 return result
/usr/lib64/python3.4/urllib/request.py in https_open(self, req)
1224 def https_open(self, req):
1225 return self.do_open(http.client.HTTPSConnection, req,
-> 1226 context=self._context, check_hostname=self._check_hostname)
1227
1228 https_request = AbstractHTTPHandler.do_request_
/usr/lib64/python3.4/urllib/request.py in do_open(self, http_class, req, **http_conn_args)
1183 h.request(req.get_method(), req.selector, req.data, headers)
1184 except OSError as err: # timeout error
-> 1185 raise URLError(err)
1186 r = h.getresponse()
1187 except:
URLError: <urlopen error [Errno -3] Temporary failure in name resolution>
In [5]: tips.head()