The NoSQL Store everyone ignores: PostgreSQL

Stephan Hochdörfer // 24.01.2014

About me

  • Stephan Hochdörfer
  • Head of IT, bitExpert AG (Mannheim, Germany)
  • S.Hochdoerfer@bitExpert.de
  • @shochdoerfer

  • #PHP, #DevOps, #Automation

NoSQL?




« [...] provides a mechanism for storage and retrieval of data that is modeled in means other than the tabular relations used in relational databases. » - Wikipedia

NoSQL Databases

NoSQL Database Types


  • Graph Database
  • Object Database
  • Column Store
  • Key-Value Store
  • Document Store

Why NoSQL?



  • Can deal "better" with
    Big Data
  • Frequent schema
    changes possible
  • Scalability due
    to redundancy

PostgreSQL




« [...] is an open source object-relational database management system (ORDBMS) with an emphasis on extensibility and standards compliance. » - Wikipedia

Schemaless?


  • Schema changes
    can be expensive
  • Can speed up
    Development
  • Can help with
    Continuous Delivery
  • Schemaless does not
    mean unstructured!

Postgres Datatypes


"Schemaless" datatypes:

  • Array
  • hstore
  • JSON
  • XML

Array




« [...] PostgreSQL allows columns of a table to be
defined as variable-length multidimensional arrays. »
- PostgreSQL documentation

Create array column


CREATE TABLE sales_stats (
    product           text,
    sales_by_quarter  integer[]
);

Create array column


CREATE TABLE sales_stats (
    product           text,
    sales_by_quarter  integer ARRAY[4]
);

Create array column


CREATE TABLE tictactoe (
  squares   integer[3][3]
);

Insert an array


INSERT INTO sales_stats
    VALUES (
    'Product A',
    '{10000, 10000, 10000, 10000}');

INSERT INTO sales_stats
    VALUES (
    'Product B',
    '{20000, 25000, 25000, 25000}');

Insert an array


INSERT INTO sales_stats
    VALUES (
    'Product A',
    ARRAY[10000, 10000, 10000, 10000]);

INSERT INTO sales_stats
    VALUES (
    'Product B',
    ARRAY[20000, 25000, 25000, 25000]);

Query arrays


SELECT product FROM sales_stats WHERE sales_by_quarter[1] <> 
    sales_by_quarter[2];
 product
----------
 Product B
(1 row)

Query arrays


SELECT sales_by_quarter[3] FROM sales_stats;
 sales_by_quarter
------------------
          10000
          25000
(2 rows)

Query arrays


SELECT sales_by_quarter[5] FROM sales_stats;
 sales_by_quarter
------------------
          
          
(2 rows)

Query arrays


SELECT ARRAY(SELECT product FROM sales_stats);
                                 array
-----------------------------------------------------------------
 {Product A, Product B}
(1 row)

Modify an array


UPDATE sales_stats 
    SET sales_by_quarter = '{25000,25000,27000,27000}' 
    WHERE product = 'Product B';

Modify an array


UPDATE sales_stats 
  SET sales_by_quarter[4] = 27000 
  WHERE product = 'Product B';

Modify an array


UPDATE sales_stats 
  SET sales_by_quarter[6] = 35000 
  WHERE product = 'Product B';
SELECT * FROM sales_stats WHERE product = 'Product B';
  product  |           sales_by_quarter
-----------+--------------------------------------
 Product B | {20000,25000,25000,25000,NULL,35000}
(1 row)

Modify an array


UPDATE sales_stats 
SET sales_by_quarter[1:2] = '{27000,27000}' 
WHERE product = 'Product B';

Filtering arrays


SELECT * FROM sales_stats WHERE 
    sales_by_quarter[1] = 10000 OR
    sales_by_quarter[2] = 10000 OR
    sales_by_quarter[3] = 10000 OR
    sales_by_quarter[4] = 10000;
  product  |     sales_by_quarter
-----------+---------------------------
 Product A | {10000,10000,10000,10000}
(1 row)

Filtering arrays


SELECT * FROM sales_stats WHERE 10000 = ANY (sales_by_quarter);
  product  |     sales_by_quarter
-----------+---------------------------
 Product A | {10000,10000,10000,10000}
(1 row)

Filtering arrays


SELECT * FROM sales_stats WHERE 10000 = ALL (sales_by_quarter);
  product  |     sales_by_quarter
-----------+---------------------------
 Product A | {10000,10000,10000,10000}
(1 row)

Filtering arrays


SELECT * FROM sales_stats WHERE sales_by_quarter 
  && ARRAY[10000, 20000];
  product  |           sales_by_quarter
-----------+--------------------------------------
 Product A | {10000,10000,10000,10000}
 Product B | {20000,25000,25000,25000}
(2 rows)

Array functions


  • array_dims()
  • array_upper()
  • array_lower()
  • array_length()
  • array_prepend()
  • array_append()
  • ...

hstore




« [...] data type for storing sets of key/value pairs within a single PostgreSQL value. [...] Keys and values are simply text strings. » - PostgreSQL documentation

Install hstore extension


CREATE EXTENSION hstore;

Create hstore column


CREATE TABLE sales_stats (
    product           text,
    sales_by_quarter  hstore
);

Insert hstore data


INSERT INTO sales_stats
    VALUES (
    'Product A',
    'q1 => 10000, q2 => 10000, q3 => 10000, q4 => 10000'::hstore);

INSERT INTO sales_stats
    VALUES (
    'Product B',
    'q1 => 20000, q2 => 25000, q3 => 25000, q4 => 25000'::hstore);

Query hstore


SELECT product FROM sales_stats 
  WHERE (sales_by_quarter->'q1')::int <> 
        (sales_by_quarter->'q2')::int;
product
----------
 Product B
(1 row)

Query hstore


SELECT sales_by_quarter->'q3' FROM sales_stats;
   ?column?
------------------
          10000
          25000
(2 rows)

Query hstore


SELECT sales_by_quarter->'q5' FROM sales_stats;
    ?column?
------------------
          
          
(2 rows)

Query hstore


SELECT product FROM sales_stats WHERE sales_by_quarter ? 'q4';
  product
-----------
 Product A
 Product B
(2 rows)

Query hstore


SELECT product FROM sales_stats WHERE sales_by_quarter ? 'q5';
 product
---------
(0 rows)

Query hstore


SELECT product FROM sales_stats WHERE sales_by_quarter @> 
  'q1 => 10000';
  product
-----------
 Product A
(1 row)

Query hstore


SELECT DISTINCT UNNEST(AKEYS(sales_by_quarter)) as keys 
FROM sales_stats;
 keys
------
 q1
 q3
 q4
 q2
(4 rows)

Modify hstore column


UPDATE sales_stats 
SET sales_by_quarter = 'q1 => 20000, q2 => 25000, q3 => 25000, q4 => 25000'::hstore 
WHERE product = 'Product B';

Modify hstore column


UPDATE sales_stats 
SET sales_by_quarter = sales_by_quarter || 'q4=>27000' 
WHERE product = 'Product B';

Modify hstore column


UPDATE sales_stats 
SET sales_by_quarter = sales_by_quarter::hstore - 'q4'::text 
WHERE product = 'Product B';

hstore functions


  • akeys()
  • avals()
  • hstore_to_array()
  • hstore_to_json()
  • ...

hstore in Postgres 9.4


  • Types: numeric, boolean,
    String, NULL
  • Optimize JSON conversion
  • Optimize Index handling

JSON




« [...] the json data type has the advantage of checking
that each stored value is a valid JSON value. »
- PostgreSQL documentation

Create JSON column


CREATE TABLE sales_stats (
    product           text,
    sales_by_quarter  json
);

Insert JSON object


INSERT INTO sales_stats
    VALUES (
    'Product A',
    '{ "q1": 10000, "q2": 10000, "q3": 10000, "q4": 10000 }');

INSERT INTO sales_stats
    VALUES (
    'Product B',
    '{ "q1": 20000, "q2": 25000, "q3": 25000, "q4": 25000}');

Insert JSON object


INSERT INTO sales_stats
    VALUES (
    'Product C',
    '{ "q1": {"de": 5000, "uk": 1500}, 
       "q2": 10000, 
       "q3": 10000, 
       "q4": 10000 
     }');

Query JSON column


SELECT product FROM sales_stats 
  WHERE (sales_by_quarter->>'q1')::int <> 
        (sales_by_quarter->>'q2')::int;
product
----------
 Product B
(1 row)

Query JSON column


SELECT sales_by_quarter->>'q3' FROM sales_stats;
 sales_by_quarter
------------------
          10000
          25000
(2 rows)

Query JSON column


SELECT sales_by_quarter->>'q5' FROM sales_stats;
 sales_by_quarter
------------------
          
          
(2 rows)

Query JSON column


SELECT * FROM json_each((
  SELECT sales_by_quarter FROM sales_stats 
  WHERE product = 'Product A'
));
 key | value
-----+-------
 q1  | 20000
 q2  | 25000
 q3  | 25000
 q4  | 25000
(4 rows)

Modify JSON column


UPDATE sales_stats 
SET sales_by_quarter = '{ "q1": 20000, "q2": 25000, "q3": 25000, "q4": 25000}' 
WHERE product = 'Product B';

Filtering JSON columns


SELECT * FROM sales_stats WHERE 
    (sales_by_quarter->>'q1')::int = 10000 OR
    (sales_by_quarter->>'q2')::int = 10000 OR
    (sales_by_quarter->>'q3')::int = 10000 OR
    (sales_by_quarter->>'q4')::int = 10000;
  product  |                    sales_by_quarter
-----------+-----------------------------------------------------
 Product A | {"q1": 10000, "q2": 10000, "q3": 10000, "q4": 10000}
(1 row)

Index JSON column


CREATE UNIQUE INDEX sales_stats_unique_q1 
ON sales_stats((sales_by_quarter->>'q1'));
INSERT INTO sales_stats
    VALUES (
    'Product C',
    '{ "q1": 10000, "q2": 10000, "q3": 10000, "q4": 10000 }');
ERROR:  duplicate key value violates unique constraint "sales_stats_unique_q1"
DETAIL:  Key ((sales_by_quarter ->> 'q1'::text))=(10000) already exists.

JSON functions


  • array_to_json()
  • row_to_json()
  • json_array_length()
  • json_object_keys()
  • ...

Micro-JSON Webservice


<?php
$db = new PDO('pgsql:user=myuser dbname=mydb password=mypwd');

$stmt = $db->prepare('select sales_by_quarter from sales_stats WHERE product LIKE :product');
$stmt->bindParam(':product', $_GET['product'], PDO::PARAM_STR);
$stmt->execute();

echo $stmt->fetch(PDO::FETCH_COLUMN, 0);
{ "q1": 20000, "q2": 25000, "q3": 25000, "q4": 25000}

BSON




« [...] is a document serialization format used by MongoDB. This extension allows for smoother migration path from MongoDB to PostgreSQL. » - postgresbson

Compile BSON extension




git clone https://github.com/maciekgajewski/postgresbson.git
mkdir postgresbson-build
cd postgresbson-build
cmake ../postgresbson
make
make install
make test

Create BSON column


CREATE TABLE sales_stats (
    product           text,
    sales_by_quarter  BSON
);

XML




« [...] it checks the input values for well-formedness, and there are support functions to perform type-safe operations on it. » - PostgreSQL documentation

Create XML column


CREATE TABLE sales_stats (
    product           text,
    sales_by_quarter  XML
);

Insert XML document


INSERT INTO sales_stats
    VALUES (
    'Product A',
    XMLPARSE (DOCUMENT '<?xml version="1.0"?>
      <qs>
	<q1>10000</q1>
	<q2>10000</q2>
	<q3>10000</q3>
	<q4>10000</q4>
     </qs>'));

Insert XML document


INSERT INTO sales_stats
    VALUES (
    'Product A',
    XMLPARSE (DOCUMENT '<?xml version="1.0"?>
      <qs>
	<q i="1" value="10000" />
	<q i="2" value="10000" />
	<q i="3" value="10000" />
	<q i="4" value="10000" />
     </qs>'));

Insert XML document


INSERT INTO sales_stats
    VALUES (
    'Product A',
    '<?xml version="1.0"?>
      <qs>
	<q1>10000</q1>
	<q2>10000</q2>
	<q3>10000</q3>
	<q4>10000</q4>
     </qs>'::xml);

Query XML document


SELECT xpath('/qs/q1/text()', sales_by_quarter) FROM sales_stats;
  ?column?
-----------
 10000
(1 row)

Modify XML document


UPDATE sales_stats 
SET sales_by_quarter = '<?xml version="1.0"?><qs><q1>10000</q1><q2>10000</q2><q3>10000</q3><q4>10000</q4></qs>'::xml 
WHERE product = 'Product A';

Search in XML document


SELECT product FROM sales_stats 
WHERE CAST(xpath('/qs/q1/text()', sales_by_quarter) AS TEXT[]) @> '{10000}'::TEXT[];
  product
-----------
 Product A
(1 row)

Search in XML document


SELECT product FROM sales_stats 
WHERE xpath_exists('/qs/q1/text()', sales_by_quarter);
  product
-----------
 Product A
(1 row)

Create index for XML nodes


CREATE INDEX q1_idx ON sales_stats USING 
GIN(CAST(xpath('/qs/q1/text()', sales_by_quarter) AS TEXT[]));

Create XML export


SELECT query_to_xml('SELECT product FROM sales_stats', true, false, '') as product;
       product
------------------------------------------------------------
<table xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
  <row>
    <product>Product A</product>
  </row>

  <row>
    <product>Product B</product>
  </row>
</table>

(1 row)

XML functions


  • xmlconcat()
  • xmlforest()
  • xmlagg()
  • xml_is_well_formed()
  • query_to_xml()
  • xpath()
  • ...

Partition database tables




« [...] Partitioning refers to splitting what is logically one large table into smaller physical pieces. » - PostgreSQL documentation

Partition database tables


CREATE TABLE sales_stats (
    year              int,
    product           text,
    sales_by_quarter  hstore
);
CREATE TABLE sales_stats_2012(
    CHECK ( year = 2012 )
) INHERITS (sales_stats);

CREATE TABLE sales_stats_2011(
    CHECK ( year = 2011 )
) INHERITS (sales_stats);

Partition database tables


CREATE OR REPLACE FUNCTION sales_stats_part()
RETURNS TRIGGER AS 
$BODY$
DECLARE
  _new_time int;
  _tablename text;
  _year text;
  _result record;
BEGIN
  _new_time := ((NEW."time"/86400)::int)*86400;
  _year := to_char(to_timestamp(_new_time), 'YYYY');
  _tablename := 'sales_stats_'||_year;
 
  PERFORM 1
  FROM   pg_catalog.pg_class c
  JOIN   pg_catalog.pg_namespace n ON n.oid = c.relnamespace
  WHERE  c.relkind = 'r' AND c.relname = _tablename;

Partition database tables


  IF NOT FOUND THEN
    EXECUTE 'CREATE TABLE ' || quote_ident(_tablename) || ' (
      CHECK ( year = ' || quote_literal(_year) || '))
    ) INHERITS (sales_stats)';
  END IF;
 
  EXECUTE 'INSERT INTO ' || quote_ident(_tablename) || ' VALUES ($1.*)' USING NEW;
  RETURN NULL;
END;
$BODY$
LANGUAGE plpgsql;
CREATE TRIGGER sales_stats_trigger
BEFORE INSERT ON sales_stats
FOR EACH ROW EXECUTE PROCEDURE sales_stats_part();

To infinity and beyond!



  • Foreign Data Wrappers
  • Conditional Indexes
  • Range Datentype
  • Materialized views
  • ...

Peaceful coexistence...



  • Store documents in a
    document-oriented fashion
  • Store relational data
    in a relational schema

...but choose wisely



Large ecosystem

PostgreSQL users








Thank you!







Do not forget to rate the talk:
https://joind.in/10293