PostgreSQL: Die NoSQL Datenbank

Stephan Hochdörfer // 08.11.2013

Über mich

  • Stephan Hochdörfer
  • Head of IT, bitExpert AG (Mannheim)
  • S.Hochdoerfer@bitExpert.de
  • @shochdoerfer

  • #PHP, #DevOps, #Automation

NoSQL?




« [...] bezeichnet Datenbanken, die einen nicht-relationalen Ansatz verfolgen und damit mit der langen Geschichte von relationalen Datenbanken brechen. » - Wikipedia

NoSQL Datenbanken

NoSQL Klassifizierung


  • Graphdatenbanken
  • Objektdatenbanken
  • Spaltenorientierte
    Datenbanken
  • Key-Value Datenbanken
  • Dokumentenorientierte
    Datenbanken

Warum NoSQL?


  • "Besserer" Umgang mit
    großen Datenmengen
  • Häufige Schemaänderungen
    unproblematisch
  • Verteilte Systeme,
    redundante Datenhaltung
  • bessere Skalierungs-
    möglichkeiten

PostgreSQL




« [...] ist ein freies, objektrelationales Datenbankmanagementsystem. » - Wikipedia

Schema vs. Schemafrei


  • Schemaänderungen
    können teuer sein
  • Schemafrei heißt nicht
    unstrukturiert
  • Schemafreiheit begünstigt
    Continuous Delivery
  • Schemafreiheit
    beschleunigt Entwicklung

Postgres Datentypen


"Schemafreie" Datentypen:

  • Array
  • hstore
  • JSON
  • XML

Array



« [...] PostgreSQL allows columns of a table to be defined as variable-length multidimensional arrays. »
- PostgreSQL documentation

Array Spalte anlegen


CREATE TABLE sales_stats (
    product           text,
    sales_by_quarter  integer[]
);

Array Spalte anlegen


CREATE TABLE sales_stats (
    product           text,
    sales_by_quarter  integer ARRAY[4]
);

Array Spalte anlegen


CREATE TABLE tictactoe (
  squares   integer[3][3]
);

Insert eines Arrays


INSERT INTO sales_stats
    VALUES (
    'Produkt A',
    '{10000, 10000, 10000, 10000}');

INSERT INTO sales_stats
    VALUES (
    'Produkt B',
    '{20000, 25000, 25000, 25000}');

Insert eines Arrays


INSERT INTO sales_stats
    VALUES (
    'Produkt A',
    ARRAY[10000, 10000, 10000, 10000]);

INSERT INTO sales_stats
    VALUES (
    'Produkt B',
    ARRAY[20000, 25000, 25000, 25000]);

Arrays abfragen


SELECT product FROM sales_stats WHERE sales_by_quarter[1] <> 
    sales_by_quarter[2];
 product
----------
 Produkt B
(1 row)

Arrays abfragen


SELECT sales_by_quarter[3] FROM sales_stats;
 sales_by_quarter
------------------
          10000
          25000
(2 rows)

Arrays abfragen


SELECT sales_by_quarter[5] FROM sales_stats;
 sales_by_quarter
------------------
          
          
(2 rows)

Arrays abfragen


SELECT ARRAY(SELECT product FROM sales_stats);
                                 array
-----------------------------------------------------------------
 {Produkt A, Produkt B}
(1 row)

Arrays modifizieren


UPDATE sales_stats 
    SET sales_by_quarter = '{25000,25000,27000,27000}' 
    WHERE product = 'Produkt B';

Arrays modifizieren


UPDATE sales_stats 
  SET sales_by_quarter[4] = 27000 
  WHERE product = 'Produkt B';

Arrays modifizieren


UPDATE sales_stats 
  SET sales_by_quarter[6] = 35000 
  WHERE product = 'Produkt B';
SELECT * FROM sales_stats WHERE product = 'Produkt B';
  product  |           sales_by_quarter
-----------+--------------------------------------
 Produkt B | {20000,25000,25000,25000,NULL,35000}
(1 row)

Arrays modifizieren


UPDATE sales_stats 
SET sales_by_quarter[1:2] = '{27000,27000}' 
WHERE product = 'Produkt B';

Arrays durchsuchen


SELECT * FROM sales_stats WHERE 
    sales_by_quarter[1] = 10000 OR
    sales_by_quarter[2] = 10000 OR
    sales_by_quarter[3] = 10000 OR
    sales_by_quarter[4] = 10000;
  product  |     sales_by_quarter
-----------+---------------------------
 Produkt A | {10000,10000,10000,10000}
(1 row)

Arrays durchsuchen


SELECT * FROM sales_stats WHERE 10000 = ANY (sales_by_quarter);
  product  |     sales_by_quarter
-----------+---------------------------
 Produkt A | {10000,10000,10000,10000}
(1 row)

Arrays durchsuchen


SELECT * FROM sales_stats WHERE 10000 = ALL (sales_by_quarter);
  product  |     sales_by_quarter
-----------+---------------------------
 Produkt A | {10000,10000,10000,10000}
(1 row)

Arrays durchsuchen


SELECT * FROM sales_stats WHERE sales_by_quarter 
  && ARRAY[10000, 20000];
  product  |           sales_by_quarter
-----------+--------------------------------------
 Produkt A | {10000,10000,10000,10000}
 Produkt B | {20000,25000,25000,25000}
(2 rows)

Array Funktionen


  • array_dims()
  • array_upper()
  • array_lower()
  • array_length()
  • array_prepend()
  • array_append()
  • ...

hstore




« [...] data type for storing sets of key/value pairs within a single PostgreSQL value. [...] Keys and values are simply text strings. » - PostgreSQL documentation

hstore Extension installieren


CREATE EXTENSION hstore;

hstore Spalte anlegen


CREATE TABLE sales_stats (
    product           text,
    sales_by_quarter  hstore
);

Insert mit hstore


INSERT INTO sales_stats
    VALUES (
    'Produkt A',
    'q1 => 10000, q2 => 10000, q3 => 10000, q4 => 10000'::hstore);

INSERT INTO sales_stats
    VALUES (
    'Produkt B',
    'q1 => 20000, q2 => 25000, q3 => 25000, q4 => 25000'::hstore);

hstore abfragen


SELECT product FROM sales_stats 
  WHERE (sales_by_quarter->'q1')::int <> 
        (sales_by_quarter->'q2')::int;
product
----------
 Produkt B
(1 row)

hstore abfragen


SELECT sales_by_quarter->'q3' FROM sales_stats;
   ?column?
------------------
          10000
          25000
(2 rows)

hstore abfragen


SELECT sales_by_quarter->'q5' FROM sales_stats;
    ?column?
------------------
          
          
(2 rows)

hstore abfragen


SELECT product FROM sales_stats WHERE sales_by_quarter ? 'q4';
  product
-----------
 Produkt A
 Produkt B
(2 rows)

hstore abfragen


SELECT product FROM sales_stats WHERE sales_by_quarter ? 'q5';
 product
---------
(0 rows)

hstore abfragen


SELECT product FROM sales_stats WHERE sales_by_quarter @> 
  'q1 => 10000';
  product
-----------
 Produkt A
(1 row)

hstore abfragen


SELECT DISTINCT UNNEST(AKEYS(sales_by_quarter)) as keys 
FROM sales_stats;
 keys
------
 q1
 q3
 q4
 q2
(4 rows)

hstore modifizieren


UPDATE sales_stats 
SET sales_by_quarter = 'q1 => 20000, q2 => 25000, q3 => 25000, q4 => 25000'::hstore 
WHERE product = 'Produkt B';

hstore modifizieren


UPDATE sales_stats 
SET sales_by_quarter = sales_by_quarter || 'q4=>27000' 
WHERE product = 'Produkt B';

hstore modifizieren


UPDATE sales_stats 
SET sales_by_quarter = sales_by_quarter::hstore - 'q4'::text 
WHERE product = 'Produkt B';

hstore Funktionen


  • akeys()
  • avals()
  • hstore_to_array()
  • hstore_to_json()
  • ...

hstore Ausblick (9.4)


  • Typen: numeric, boolean,
    String, NULL
  • JSON Konvertierung
    optimieren
  • Index-Handling optimieren

Use-case: Auditing changes


CREATE TABLE audit
(
    change_date timestamptz default now(),
    before                          hstore,
    after                           hstore
);
create function audit()
  returns trigger
  language plpgsql
as $$
begin
  INSERT INTO audit(before, after)
       SELECT hstore(old), hstore(new);
  return new;
end;
$$;

Use-case: Auditing changes


create trigger audit
      after update on example
          for each row
execute procedure audit();
select change_date, after - before as diff from audit;

          change_date          |   diff    
-------------------------------+-----------
 2013-11-08 09:29:19.808217+02 | "f1"=>"b"
 2013-11-08 09:29:19.808217+02 | "f2"=>"c"

(2 rows)

Quelle: Auditing Changes with Hstore

Mit hstore in PHP arbeiten


function array_to_hstore(array $data) {
	$return = array();
	foreach($data as $key => $value) {
		if(is_array($key) || is_array($value)) {
			continue;
		}
		$return[] = '"'.addslashes($key).'"=>"'.
		    addslashes($value).'"';
	}
	return implode(',', $return);
}

Mit hstore in PHP arbeiten


function hstore_to_array($data) {
	$return = array();
	$data   = str_getcsv($data, ',');
	foreach($data as $item) {
		$item    = explode('=>', $item, 2);
		$item[0] = str_replace('"', '', $item[0]);
		if('' == $item[0]) {
			continue;
		}
		$item[1] = str_replace('"', '', $item[1]);
		if('' == $item[1]) {
			$item[1] = null;
		}
		$return[$item[0]] = $item[1];
	}
	return $return;
}

JSON




« [...] the json data type has the advantage of checking
that each stored value is a valid JSON value. »
- PostgreSQL documentation

JSON Spalte anlegen


CREATE TABLE sales_stats (
    product           text,
    sales_by_quarter  json
);

Insert eines JSON Objekts


INSERT INTO sales_stats
    VALUES (
    'Produkt A',
    '{ "q1": 10000, "q2": 10000, "q3": 10000, "q4": 10000 }');

INSERT INTO sales_stats
    VALUES (
    'Produkt B',
    '{ "q1": 20000, "q2": 25000, "q3": 25000, "q4": 25000}');

Insert eines JSON Objekts


INSERT INTO sales_stats
    VALUES (
    'Produkt C',
    '{ "q1": {"de": 5000, "uk": 1500}, 
       "q2": 10000, 
       "q3": 10000, 
       "q4": 10000 
     }');

JSON Objekt abfragen


SELECT product FROM sales_stats 
  WHERE (sales_by_quarter->>'q1')::int <> 
        (sales_by_quarter->>'q2')::int;
product
----------
 Produkt B
(1 row)

JSON Objekt abfragen


SELECT sales_by_quarter->>'q3' FROM sales_stats;
 sales_by_quarter
------------------
          10000
          25000
(2 rows)

JSON Objekt abfragen


SELECT sales_by_quarter->>'q5' FROM sales_stats;
 sales_by_quarter
------------------
          
          
(2 rows)

JSON Objekt abfragen


SELECT * FROM json_each((
  SELECT sales_by_quarter FROM sales_stats 
  WHERE product = 'Produkt A'
));
 key | value
-----+-------
 q1  | 20000
 q2  | 25000
 q3  | 25000
 q4  | 25000
(4 rows)

JSON Objekt modifizieren


UPDATE sales_stats 
SET sales_by_quarter = '{ "q1": 20000, "q2": 25000, "q3": 25000, "q4": 25000}' 
WHERE product = 'Produkt B';

JSON Objekte durchsuchen


SELECT * FROM sales_stats WHERE 
    (sales_by_quarter->>'q1')::int = 10000 OR
    (sales_by_quarter->>'q2')::int = 10000 OR
    (sales_by_quarter->>'q3')::int = 10000 OR
    (sales_by_quarter->>'q4')::int = 10000;
  product  |                    sales_by_quarter
-----------+-----------------------------------------------------
 Produkt A | {"q1": 10000, "q2": 10000, "q3": 10000, "q4": 10000}
(1 row)

JSON Objekte indexieren


CREATE UNIQUE INDEX sales_stats_unique_q1 
ON sales_stats((sales_by_quarter->>'q1'));
INSERT INTO sales_stats
    VALUES (
    'Produkt C',
    '{ "q1": 10000, "q2": 10000, "q3": 10000, "q4": 10000 }');
ERROR:  duplicate key value violates unique constraint "sales_stats_unique_q1"
DETAIL:  Key ((sales_by_quarter ->> 'q1'::text))=(10000) already exists.

JSON Funktionen


  • array_to_json()
  • row_to_json()
  • json_array_length()
  • json_object_keys()
  • ...

Micro-JSON Webservice


<?php
$db = new PDO('pgsql:user=myuser dbname=mydb password=mypwd');

$stmt = $db->prepare('select sales_by_quarter from sales_stats WHERE product LIKE :product');
$stmt->bindParam(':product', $_GET['product'], PDO::PARAM_STR);
$stmt->execute();

echo $stmt->fetch(PDO::FETCH_COLUMN, 0);
{ "q1": 20000, "q2": 25000, "q3": 25000, "q4": 25000}

XML




« [...] it checks the input values for well-formedness, and there are support functions to perform type-safe operations on it. » - PostgreSQL documentation

XML Spalte anlegen


CREATE TABLE sales_stats (
    product           text,
    sales_by_quarter  XML
);

Insert eines XML Dokuments


INSERT INTO sales_stats
    VALUES (
    'Produkt A',
    XMLPARSE (DOCUMENT '<?xml version="1.0"?>
      <qs>
	<q1>10000</q1>
	<q2>10000</q2>
	<q3>10000</q3>
	<q4>10000</q4>
     </qs>'));

Insert eines XML Dokuments


INSERT INTO sales_stats
    VALUES (
    'Produkt A',
    XMLPARSE (DOCUMENT '<?xml version="1.0"?>
      <qs>
	<q i="1" value="10000" />
	<q i="2" value="10000" />
	<q i="3" value="10000" />
	<q i="4" value="10000" />
     </qs>'));

Insert eines XML Dokuments


INSERT INTO sales_stats
    VALUES (
    'Produkt A',
    '<?xml version="1.0"?>
      <qs>
	<q1>10000</q1>
	<q2>10000</q2>
	<q3>10000</q3>
	<q4>10000</q4>
     </qs>'::xml);

XML Daten abfragen


SELECT xpath('/qs/q1/text()', sales_by_quarter) FROM sales_stats;
  ?column?
-----------
 10000
(1 row)

XML Dokument modifizieren


UPDATE sales_stats 
SET sales_by_quarter = '<?xml version="1.0"?><qs><q1>10000</q1><q2>10000</q2><q3>10000</q3><q4>10000</q4></qs>'::xml 
WHERE product = 'Produkt A';

XML Dokument durchsuchen


SELECT product FROM sales_stats 
WHERE CAST(xpath('/qs/q1/text()', sales_by_quarter) AS TEXT[]) @> '{10000}'::TEXT[];
  product
-----------
 Produkt A
(1 row)

XML Dokument durchsuchen


SELECT product FROM sales_stats 
WHERE xpath_exists('/qs/q1/text()', sales_by_quarter);
  product
-----------
 Produkt A
(1 row)

XML Daten indexieren


CREATE INDEX q1_idx ON sales_stats USING 
GIN(CAST(xpath('/qs/q1/text()', sales_by_quarter) AS TEXT[]));

XML Export erzeugen


SELECT query_to_xml('SELECT product FROM sales_stats', true, false, '') as product;
       product
------------------------------------------------------------
<table xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
  <row>
    <product>Produkt A</product>
  </row>

  <row>
    <product>Produkt B</product>
  </row>
</table>

(1 row)

XML Funktionen


  • xmlconcat()
  • xmlforest()
  • xmlagg()
  • xml_is_well_formed()
  • query_to_xml()
  • xpath()
  • ...

Tabellenpartitionierung




« [...] Partitioning refers to splitting what is logically one large table into smaller physical pieces. » - PostgreSQL documentation

Tabellenpartitionierung


CREATE TABLE sales_stats (
    year              int,
    product           text,
    sales_by_quarter  hstore
);
CREATE TABLE sales_stats_2012(
    CHECK ( year = 2012 )
) INHERITS (sales_stats);

CREATE TABLE sales_stats_2011(
    CHECK ( year = 2011 )
) INHERITS (sales_stats);

Tabellenpartitionierung


CREATE OR REPLACE FUNCTION sales_stats_part()
RETURNS TRIGGER AS 
$BODY$
DECLARE
  _new_time int;
  _tablename text;
  _year text;
  _result record;
BEGIN
  _new_time := ((NEW."time"/86400)::int)*86400;
  _year := to_char(to_timestamp(_new_time), 'YYYY');
  _tablename := 'sales_stats_'||_year;
 
  PERFORM 1
  FROM   pg_catalog.pg_class c
  JOIN   pg_catalog.pg_namespace n ON n.oid = c.relnamespace
  WHERE  c.relkind = 'r' AND c.relname = _tablename;

Tabellenpartitionierung


  IF NOT FOUND THEN
    EXECUTE 'CREATE TABLE ' || quote_ident(_tablename) || ' (
      CHECK ( year = ' || quote_literal(_year) || '))
    ) INHERITS (sales_stats)';
  END IF;
 
  EXECUTE 'INSERT INTO ' || quote_ident(_tablename) || ' VALUES ($1.*)' USING NEW;
  RETURN NULL;
END;
$BODY$
LANGUAGE plpgsql;
CREATE TRIGGER sales_stats_trigger
BEFORE INSERT ON sales_stats
FOR EACH ROW EXECUTE PROCEDURE sales_stats_part();

...und darüber hinaus



  • Foreign Data Wrappers
  • Conditional Indexes
  • Range Datentyp
  • Materialized views
  • ...

Friedliche Co-Existenz



  • Speichere Dokumente
    dokumentenorientiert
  • Speichere relationale Daten
    relational


Unterscheide je nach Anwendungsfall

Umfangreiches Ökosystem

Große Nutzerbasis








Vielen Dank für die Aufmerksamkeit!