It this post I’d like to share experience in PostgreSQL, XPath and XML processing.
Let’s assume that we’ve got following xml:
<artists> <artist name="John Doe"/> <artist name="Edward Poe"/> <artist name="Mark The Great"/> </artists>
Our first task is to extract all artists:
create or replace function xml_test(xmlStr character varying)
returns integer as
$BODY$
declare
artistsList text[];
xml_data xml;
currentArtist text;
i int;
begin
xml_data = xmlStr::xml;
artistsList := (select xpath('//artists/artist/@name',xml_data)::text[] x);
for i in 1..array_upper(artistsList,1) loop
raise notice 'Artist: %', artistsList[i];
end loop;
return 0;
end;
$BODY$
LANGUAGE 'plpgsql' VOLATILE
select xml_test(
'
<artists>
<artist name="John Doe"/>
<artist name="Edward Poe"/>
<artist name="Mark The Great"/>
</artists>
')
Here is the output:
NOTICE: Artist: John Doe NOTICE: Artist: Edward Poe NOTICE: Artist: Mark The Great Time:14 ms. 1 row retrieved.
Now let’s extend our xml:
<artists>
<artist name="John Doe">
<albums>
<album name="First"/>
<album name="Second"/>
<album name="Third"/>
</albums>
</artist>
<artist name="Edward Poe"/>
<artist name="Mark The Great"/>
</artists>
What we want to extract all albums for given artist. Here we go:
create or replace function xml_test2(xmlStr character varying, artistName character varying)
returns integer as
$BODY$
declare
albumsList text[];
xml_data xml;
xpathQuery character varying;
i int;
begin
xml_data = xmlStr::xml;
xpathQuery = '//artists/artist[@name="' || artistName || '"]/albums/album/@name';
albumsList := (select xpath(xpathQuery,xml_data)::text[] x);
for i in 1..array_upper(albumsList,1) loop
raise notice 'Album: %', albumsList[i];
end loop;
return 0;
end;
$BODY$
LANGUAGE 'plpgsql' VOLATILE
select xml_test2(
'
<artists>
<artist name="John Doe">
<albums>
<album name="First"/>
<album name="Second"/>
<album name="Third"/>
</albums>
</artist>
<artist name="Edward Poe"/>
<artist name="Mark The Great"/>
</artists>
', 'John Doe')
And here is the output:
NOTICE: Album: First NOTICE: Album: Second NOTICE: Album: Third Time:13 ms. 1 row retrieved.
Finally lets add genres for albums and make them duplicating:
<artists>
<artist name="John Doe">
<albums>
<album name="First">
<genres>
<genre name="Jazz"/>
</genres>
</album>
<album name="Second">
<genres>
<genre name="Jazz"/>
<genre name="Blues"/>
</genres>
</album>
<album name="Third">
<genres>
<genre name="Jazz"/>
<genre name="New Age"/>
</genres>
</album>
</albums>
</artist>
<artist name="Edward Poe">
<albums>
<album name="First">
<genres>
<genre name="Rock"/>
</genres>
</album>
<album name="Second">
<genres>
<genre name="Rock"/>
</genres>
</album>
<album name="Third">
<genres>
<genre name="Hard Rock"/>
</genres>
</album>
</albums>
</artist>
<artist name="Mark The Great">
<albums>
<album name="First">
<genres>
<genre name="Pop"/>
<genre name="Jazz"/>
</genres>
</album>
<album name="Second">
<genres>
<genre name="Pop Rock"/>
</genres>
</album>
<album name="Third">
<genres>
<genre name="Metall"/>
</genres>
</album>
</albums>
</artist>
</artists>
This procedure will allow us to process all unique genres for each album of every artist:
create or replace function xml_test3(xmlStr character varying)
returns integer as
$BODY$
declare
genresList text[];
genre text;
xml_data xml;
i int;
begin
xml_data = xmlStr::xml;
genresList := (select xpath('//artists/artist/albums/album/genres/genre/@name',xml_data)::text[] x);
for genre in ( select distinct lower(a) from unnest(genresList) a) loop
raise notice 'Genre: %', initcap(genre);
end loop;
return 0;
end;
$BODY$
LANGUAGE 'plpgsql' VOLATILE
select xml_test3(
'
<artists>
<artist name="John Doe">
<albums>
<album name="First">
<genres>
<genre name="Jazz"/>
</genres>
</album>
<album name="Second">
<genres>
<genre name="Jazz"/>
<genre name="Blues"/>
</genres>
</album>
<album name="Third">
<genres>
<genre name="Jazz"/>
<genre name="New Age"/>
</genres>
</album>
</albums>
</artist>
<artist name="Edward Poe">
<albums>
<album name="First">
<genres>
<genre name="Rock"/>
</genres>
</album>
<album name="Second">
<genres>
<genre name="Rock"/>
</genres>
</album>
<album name="Third">
<genres>
<genre name="Hard Rock"/>
</genres>
</album>
</albums>
</artist>
<artist name="Mark The Great">
<albums>
<album name="First">
<genres>
<genre name="Pop"/>
<genre name="Jazz"/>
</genres>
</album>
<album name="Second">
<genres>
<genre name="Pop Rock"/>
</genres>
</album>
<album name="Third">
<genres>
<genre name="Metall"/>
</genres>
</album>
</albums>
</artist>
</artists>
')
Here is the output
NOTICE: Genre: Rock NOTICE: Genre: Blues NOTICE: Genre: Hard Rock NOTICE: Genre: Jazz NOTICE: Genre: Metall NOTICE: Genre: New Age NOTICE: Genre: Pop Rock NOTICE: Genre: Pop Time:13 ms. 1 row retrieved.
This is small but working cheat-sheet. You can always refer to official documentation though it will take some time to get used to it due to some minor “tricks” which can be achieved only after some practice.
Advertisement