%%% ========================================================================= %%% FLORID Example: INFORMATION INTEGRATION: %%% %%% Author: Wolfgang May %%% %%% mondial-integra.lpx: integration of XML database sources %%% ========================================================================= % ?- sys.theOMAccess.debugOn. ?- sys.theOM.eqTraceOn. % trace derived equalities ?- sys.prn.style@("bound"). % set output mode ?- sys.prn.annotatedLiterals@("on"). % gs[@xml->"file:/home/may/Mondial/Mondial-Sources/gs.xml" isa url]. cia[@xml->"file:/home/may/Mondial/Mondial-Sources/cia.xml" isa url]. orgs[@xml->"file:/home/may/Mondial/Mondial-Sources/orgs.xml" isa url]. terra[@xml->"file:/home/may/Mondial/Mondial-Sources/terra.xml" isa url]. qiblih[@xml->"file:/home/may/Mondial/Mondial-Sources/qiblih.xml" isa url]. codes[@xml->"file:/home/may/Mondial/Mondial-Sources/codes.xml" isa url]. % mondial.system = "mondial-3.0.dtd". mondial[@dtd->"file:/home/may/Mondial/Mondial-Sources/mondial-3.0.dtd" isa url]. % ?- sys.strat.doIt. U.parse@(xml,S) :- S[@xml->U]. U.parse@(dtd) :- mondial[@dtd->U]. result isa mondial. % S = X :- S[@xml->U], U.parse@(xml,S) = Doc, Doc[S:S->X]. % ?- sys.strat.doIt. ?- sys.garbageCollection. gs:population = population. gs:country = country. gs:province = province. gs:capital = capital. gs:area = area. gs:population = population. gs:year = year. gs:name = name. gs:text() = text(). % cia:name = name. cia:borders = border. cia:ethnicgroups = ethnicgroups. cia:religions = religions. cia:languages = languages. % cia:area = area. cia.population = population. cia:datacode = datacode. cia:total_area = total_area. cia:population_growth = population_growth. cia:infant_mortality= infant_mortality. cia:gdp_agri = gdp_agri. cia:gdp_ind = gdp_ind. cia:gdp_serv = gdp_serv. cia:gdp_total = gdp_total. cia:inflation = inflation. cia:indep_date = indep_date. cia:government = government. % codes:car_code = car_code. % qiblih:longitude = longitude. qiblih:latitude = latitude. % orgs:abbrev = abbrev. orgs:name = name. orgs:established = established. % ?- sys.strat.doIt. % terra:located_at = located_at. terra:mountain = mountain. terra:height = height. M[@longitude->L1 and @latitude->L2] :- terra/mountain->M[@terra:longitude->L1 and @terra:latitude->L2]. terra:desert = desert. D[@area->A] :- terra/desert->D[@terra:area->A]. terra:island = island. I[@area->A] :- terra/island->I[@terra:area->A]. terra:lake = lake. L[@area->A] :- terra/lake->L[@terra:area->A]. terra:river = river. terra:to = to. terra:type = type. terra:water = water. terra:length = length. terra:sea = sea. terra:depth = depth. D[@name->A] :- terra/desert->D[@terra:name->A]. D[@name->A] :- terra/mountain->D[@terra:name->A]. D[@name->A] :- terra/island->D[@terra:name->A]. D[@name->A] :- terra/lake->D[@terra:name->A]. D[@name->A] :- terra/river->D[@terra:name->A]. D[@name->A] :- terra/sea->D[@terra:name->A]. % ?- sys.strat.doIt. % result[continent->C] :- gs/gs:continent->C. % %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% C/city[name->"Abidjan"] :- gs/gs:country->C[gs:name/gs:text()->"Cote dIvoire"]. C/city[name->"Jeddah"] :- gs/country->C[gs:name/gs:text()->"Saudi Arabia"]. % The capital of Western Sahara: C/city[name->"El Aaiun"], C[@capital->elaaiun] :- gs/gs:country->C[gs:name/gs:text()->"Western Sahara"]. ?- sys.strat.doIt. % % result[city->C[@name->N]] :- gs/gs:city->C[name/text()->N]. % ?- sys.strat.doIt. % ?-sys.echo@("resolving multiple city names:"). % C1 = C2 :- gs/gs:city->C1[@gs:country->C and @name->N1], strcat(N1," City", N2), gs/gs:city->C2[@gs:country->C and @name->N2]. C1 = C2 :- gs/gs:city->C1[@gs:country->C and @name->N1], strcat("Ciudad de ", N1, N2), gs/gs:city->C2[@gs:country->C and @name->N2]. % ?- sys.strat.doIt. % ?-sys.echo@("CIA-relevant countries are those for which a capital is given:"). % C[@cia:rel_country->1] :- cia/cia:country->C[@cia:capital]. ?- sys.strat.doIt. % %?- cia/cia:country->C[@cia:name->N and not @cia:rel_country]. %?- cia/cia:country->C[@cia:name->N and @cia:rel_country]. % %%% ========================================================================= ?- sys.strat.doIt. % ?- sys.echo@("*** FUSING CIA and GlobalStatistics COUNTRIES ***"). ?- sys.echo@("fuse two countries if they have the same name"). % result[country->C1], C1 = C2 :- cia/cia:country->C1[@cia:rel_country and @cia:name->N], gs/gs:country->C2[gs:name/gs:text()->N]. C[@name->N] :- result/country->C/@cia:name->N. % result[country->C[@cia:rel_country->1 and @name->"Serbia and Montenegro"]] :- gs/gs:country->C[gs:name/gs:text()->"Serbia and Montenegro"]. % ?- sys.strat.doIt. % %% "false": %?- gs/gs:country[@gs:adm_divs and not @cia:rel_country]/@name->N. %% some gaga-countries: %?- gs/gs:country[@gs:main_cities and not @cia:rel_country]/@name->N. % ?- sys.strat.doIt. %%% ========================================================================= ?- sys.echo@("*** Fusing CIA caps to GlobalStatistics cities ***"). % city_synonym("Bucharest","Bucuresti"). city_synonym("Warsaw","Warszawa"). city_synonym("New Delhi","Delhi"). city_synonym("Tashkent","Toshkent"). city_synonym("Addis Ababa","Addis Abeba"). city_synonym("La Hague","s'-Gravenhage"). % ?- sys.strat.doIt. C1 = C2 :- gs/gs:city->C1[@gs:country->Ctry and @name->N1], city_synonym(N1,N2), gs/gs:city->C2[@gs:country->Ctry and @name->N2]. % %?- sys.echo@("CIA capitals which are not main cities:"). ?- sys.strat.doIt. ?- result/country->C[@cia:rel_country and @cia:capital]/@cia:name->N, not C/@gs:main_cities. ?- sys.strat.doIt. % % result[province->P] :- gs/gs:province->P/@country[@cia:rel_country]. result[city->Cty] :- gs/gs:city->Cty/@country[@cia:rel_country]. % ?- sys.strat.doIt. % %%% ========================================================================= ?- sys.echo@("*** FUSING CIA and Ext (country-codes) COUNTRIES ***"). % C1 = C2 :- codes/codes:country->C1/codes:name[@codes:language = "english"]/codes:text()->N, result/country->C2[@name->N]. % ?- sys.strat.doIt. C1 = C2 :- codes/codes:country->C1/codes:name[@codes:language = "german"]/codes:text()->N, result/country->C2[@name->N]. % C1 = C2 :- codes/codes:country->C1/codes:name[@codes:language = "local"]/codes:text()->N, result/country->C2[@name->N]. % ?- sys.strat.doIt. % %?- codes/codes:country->C/codes:name[@codes:language = "english"]/codes:text()->N, not result/country->C. % %% several non-interesting countries. %?- cia/cia:country[@cia:rel_country and @cia:name->N and not @codes:car_code]. % %%% ========================================================================= ?- sys.echo@("Merging Qiblih-Cities/CIA-capitals/GlobalStatistics-Main Cities:"). ?- sys.echo@("linking qiblih-cities to country objects:"). %/* % ?- sys.strat.doIt. Cty[@country->C] :- qiblih/qiblih:city->Cty[@qiblih:country->N], result/country->C[@name->N]. % %?- qiblih/qiblih:city[@qiblih:country->N], not result/country[@name->N]. % % ?- sys.strat.doIt. Cty[@country->Country] :- qiblih/qiblih:city->Cty[@qiblih:name->N], not Cty/@country, not Cty/@qiblih:province, result/country->Country[@cia:rel_country]/@gs:capital[@name->N]. % % Qiblih cities which have no matching country (5): % %?- qiblih/qiblih:city->C[@qiblih:country->N], not C/@country. % % ?- sys.strat.doIt. ?- sys.echo@("linking qiblih-cities to their provinces:"). % Cty[@province->P] :- qiblih/qiblih:city->Cty[@country->S and @qiblih:province->N], gs//gs:province->P[@gs:country->S and @gs:name->N]. % %/* % ?- sys.strat.doIt. ?- sys.echo@("Merging GlobalStatistics Main Cities with qiblih-cities:"). % C1 = C2 :- qiblih/qiblih:city->C1[@qiblih:name->N and @country->Ctry and not @qiblih:province], result/city->C2[@name->N and @country->Ctry]. % ?- sys.strat.doIt. % % C1 = C2 :- qiblih/qiblih:city->C1[@qiblih:name->N and not @name and @country->Ctry and @province->P], result/city->C2[@name->N and @province->P and @country->Ctry]. % ?- sys.strat.doIt. % C1 = C2 :- qiblih/qiblih:city->C1[@qiblih:name->N and not @name and @country->CDN[@name="Canada"]], result/city->C2[@name->N and @country->CDN]. % ?- sys.strat.doIt. % %%% ========================================================================= ?- sys.echo@("*** Organizations and Memberships ***"). % %% Compare CIA Org data against CIA Country data: %% Org-Members which are not countries: %?- O:organization[member@(X)->>N], not _:country[name@(cia)->N]. % %% Org-memberships which are not mentioned in Country data (71) %?- O:organization[member@(X)->>N;abbrev->ON], C:country[name@(cia)->N], not C[membership->>ON]. % %% Compare CIA Country data against CIA Org data: %% Organizations which have members but do not exist in Org data: (6) %?- _:country[membership->>ON], not _:organization[abbrev->ON]. % %% Country-memberships which are not mentioned in Org data %?- C:country[membership->>ON;name@(cia)->N], O:organization[abbrev->ON], not O[member@(_)->>N]. % % result[organization->O] :- orgs/orgs:organization->O. % ?- sys.strat.doIt. % O/members[@type->"member" and @country->C] :- result/organization->O/orgs:member_names[@orgs:type="member"]/orgs:text()->CN, result/country->C[@name->CN]. % ?- sys.strat.doIt. % O/members[@type->T and @country->C] :- result/organization->O/orgs:member_names[@orgs:type->T]/orgs:text()->CN, not O/orgs:member_names[@orgs:type="member"]/orgs:text()->CN, result/country->C[@name->CN]. % % ?- sys.strat.doIt. O[@seat->Cty] :- result/organization->O[@orgs:seatcity->N and @orgs:seatcountry->CN], result/city->Cty[@name->N and @country/@name->CN]. % ?- sys.strat.doIt. % %?- result/organization[@name->N and seatcity->SN and seatcountry->CN and not @seat]. % %%% ========================================================================= ?- sys.echo@("*** Karlsruhe TERRA ***"). ?- sys.echo@("*** Merging Countries ***"). %/* % % L = C :- terra/terra:country->L[@terra:code->Code], result/country->C[@car_code->Code]. % ?- sys.strat.doIt. % % L = C :- terra/terra:country->L[@terra:name->N and not @car_code], result/country->C/codes:name[@codes:language ="german"]/codes:text()->N. % L = C :- terra/terra:country->L[@terra:name->N and not @car_code], result/country->C/@name->N. % % L = C :- terra/terra:country->L[@terra:capital->CN and not @car_code], result/country->C/@capital/@name->CN. % ?- sys.strat.doIt. % ?- C:terra_country, not C.car_code[]. % % ?- sys.echo@("*** Merging Administrative Divisions ***"). ?- sys.echo@("*** by Name ***"). % L = P :- terra/terra:province->L[@terra:country->CC and @terra:abbrev->AB and @terra:name->N], not CC = AB, result/province->P[@gs:name -> N]/@country[@car_code->CC]. % ?- sys.strat.doIt. % %?- terra/terra:province->L[@terra:country->CC and @terra:name->N], not result/province->L. % % ?- sys.echo@("*** by capital ***"). % L = P :- terra/terra:province->L[@terra:country->CC and @terra:capital->CN], not result/province->L, count{X [CC]; terra/terra:province->X[@terra:country->CC]} > 1, result/province->P[@capital[@name->CN] and @country/@car_code->CC]. % ?- sys.strat.doIt. % % p(finland,"SF"), finland = C :- result/country->C[@car_code->"SF"]. p(norway,"N"), norway = C :- result/country->C[@car_code->"N"]. ?- sys.strat.doIt. %% result[city->Cap[@name->CN and @country->Ctry and @province->L]], result[province->L[@country->Ctry and @name->N and @capital->Cap and @population->E]] :- p(Ctry,CC), terra/terra:province->L[@terra:country->CC and @terra:abbrev->AB and @terra:name->N and @terra:capital->CN and @terra:pop->E], terra/terra:city->Cap[@terra:country->CC and terra:province/terra:text()->AB and @terra:name->CN]. % % L = C :- terra/terra:province->L[@terra:name->N1 and @terra:country->"SU"], result/country->C/codes:name[@codes:language="german"]->N2, not terra/terra:country->C, pmatch(N1,"/\A(...)/","$1",N1short), pmatch(N2,"/\A(...)/","$1",N2short), N1short = N2short. % ?- sys.strat.doIt. % % some terra\_provs are not matched (Sweden, and some dead things). %?- terra/terra:province->L[@terra:country->LID and @terra:abbrev->LTID], not LID = LTID, not result/country->L, not result/province->L. % % ?- sys.echo@("*** TERRA Continent Information ***"). % C/encompassed[@continent->CT and @percentage->Perc] :- result/country->C/terra:encompassed[@terra:continent->CN]/terra:text()->Perc, result/continent->CT[@gs:name->CN]. % C/encompassed[@continent->Asia and @percentage->80], C/encompassed[@continent->Europe and @percentage->20] :- result/country->C[@name->"Russia"], result/continent->Europe[@gs:name->"Europe"], result/continent->Asia[@gs:name->"Asia"]. % ?- sys.strat.doIt. % % C/encompassed[@continent->Cont and @percentage->100] :- result/country->C[@gs:continent->Cont and not encompassed]. % ?- sys.strat.doIt. % % ?- sys.echo@("*** Merging TERRA cities and GlobalStatistics cities ***"). % S = C :- terra/terra:city->S[@terra:name->N and @terra:country->CC and terra:province/terra:text()->CC], result/country->Country[@terra:code->CC], result/city->C[@country->Country and @name->N]. % %?- sys.echo@("*** Cities by Name/Country/Province ***"). % S = C :- terra/terra:city->S[@terra:name->N and @terra:country->CC and terra:province/terra:text()->AB], not CC = AB, result/country->Country[@terra:code->CC], terra/terra:province[@terra:name->PName and @terra:country->CC and @terra:abbrev->AB], result/city->C[@country->Country and @name->N], result/province[@gs:name->PName]. % ?- sys.strat.doIt. % ?- sys.echo@("*** Cities by Name/Country/TERRA-Province ***"). % S = C, C[@province -> P] :- terra/terra:city->S[@terra:name->N and @terra:country->CC and terra:province/terra:text()->AB], not CC = AB, result/country->Country[@terra:code->CC], terra/terra:province->P[@terra:country->CC and @terra:abbrev->AB], result/city->C[@country->Country and @name->N and not @province]. % ?- sys.break.doIt. % ?- sys.echo@("*** Capitals ***"). % S = C :- result/country[@terra:code->CC and @terra:capital->N and @capital->C], not CC="D", terra/terra:city->S[@terra:name->N and @terra:country->CC], not result/city->S. % ?- sys.break.doIt. S = C :- terra/terra:city->S[@terra:name->N and @terra:country->CC and terra:province/terra:text()->AB], not result/city->S, terra/terra:province[@terra:country->CC and @terra:abbrev->AB and @terra:capital->N and @capital->C]. % ?- sys.break.doIt. % S = C :- terra/terra:city->S[@terra:name->N and @terra:country->"SU" and terra:province/terra:text()->AB], terra/terra:province[@terra:country->"SU" and @terra:abbrev->AB and @terra:capital->N and @capital->C]. % ?- sys.strat.doIt. % S = C :- terra/terra:city->S[@terra:name->N and @terra:country->"SU" and terra:province/terra:text()->AB], result/city->C[@country->Country[@terra:country->"SU" and @terra:abbrev->AB] and @name->N]. % ?- sys.strat.doIt. % % C[@notcapital->true] :- result/city->C[@country->X], not X[@capital->C], not X/gs:adm_divs[@capital->C]. % ?- sys.strat.doIt. % % result[city->S[@country->C and @province->P]] :- result/country->C[@terra:code->CC], not C/gs:main_cities[@notcapital], terra/terra:city->S[@terra:country->CC and terra:province/terra:text()->AB], not CC = AB, not result/city->S, result/province->P[@country->C and @terra:abbrev->AB]. % ?- sys.strat.doIt. % result[city->S[@country->C and @province->P]] :- result/province->P[@country->C and @terra:abbrev->AB], N = count{City [P]; result/city->City[@province->P]}, N = 1, result/country->C[@terra:code->CC], terra/terra:city->S[@terra:country->CC and terra:province/terra:text()->AB], not CC = AB, not result/city->S. % % result[city->S[@country->C]], C[@gs:main_cities->S] :- %% other countries result/country->C[@terra:code->CC], not CC = "SU", N = count{City [C]; C/@gs:main_cities->City}, N = 1, terra/terra:city->S[@terra:country->CC and terra:province/terra:text()->CC], not result/city->S. % ?- sys.strat.doIt. % ?- sys.echo@("*** Integrating TERRA longitude/latitude ***"). % C[@longitude->Long and @latitude->Lat] :- result/city->C[@terra:longitude->Long and @terra:latitude->Lat], not C/@latitude, not C/@longitude. % ?- sys.strat.doIt. % C[@name->N and name->N] :- result/city->C[@terra:name->N and not name]. ?- sys.strat.doIt. % ?- sys.echo@("*** Integrating TERRA city population ***"). % C/population[@year->87 and text()->E] :- result/city->C[@terra:population->E and not population]. % ?- sys.strat.doIt. % ?- sys.echo@("*** Integrating TERRA car codes ***"). % C[@car_code->CC] :- result/country->C[@terra:code->CC and not @car_code]. % % Checking non-integrated \TERRA\ data: %?- terra/terra:province->L[@terra:country->CC and @terra:name->P], not CC = P, not result/country->L, not result/province->L. %?- L:terra_country, not L:country. %?- S:terra_city, not S:city. %% % ?- sys.echo@("*** Integrating TERRA geo-objects ***"). % mountain subcl geo_obj. desert subcl geo_obj. island subcl geo_obj. water subcl geo_obj. lake subcl water. river subcl water. sea subcl water. % ?- sys.strat.doIt. % % link geo objects from terra to result: result[Class->Obj] :- terra/Class->Obj, Class subcl geo_obj. % ?- sys.strat.doIt. % Obj[@country->C] :- terra/Class->Obj/terra:located[@terra:country_code->CC], Class subcl geo_obj, not CC = "SU", result/country->C[@terra:code->CC]. % Obj[@country->C] :- terra/Class->Obj/terra:located[@terra:country_code="SU" and @terra:province_id->AB], result/country->C[@terra:abbrev->AB]. % Obj[country->C] :- terra/Class->Obj/terra:located[@terra:country_code="CS"], result/country->C[@car_code="CZ"]. % ?- sys.strat.doIt. % % L[@country->C and @province->P] :- terra/Class/terra:located->L[@terra:country_code->CC and @terra:province_id->AB], result/province->P[@terra:abbrev->AB and @country->C/@terra:code->CC]. % ?- sys.strat.doIt. % ?- sys.echo@("*** loading add-ons ***"). ?- sys.load@("../Mondial-programs/mondial-addprovs.flp"). % ?-sys.strat.doIt. format.name="$out[0]=$in[0]; $out[0]=~tr/áâãäåÉéèíïÍÖóôöúüçñ/aaaaaEeeiiIOooouucn/". % ?-sys.strat.doIt. % addprov2(N2,CC,PN2) :- addprov(N,_,CC,PN), perl(format.name, N, N2), perl(format.name, PN, PN2). % ?-sys.strat.doIt. % C[@province->P] :- addprov2(N,CC,PN), result/city->C[@name->N]/@country[@car_code->CC], result/province->P[@name->PN]/@country[@car_code->CC]. % ?-sys.strat.doIt. % Obj[@country->C] :- addgeocountry(Geo,Name,Code), Class subcl geo_obj, terra/Class->Obj[@name->Name], result/country->C[@car_code->Code]. % ?- sys.strat.doIt. % % addgeoprov(Geo, Name, Code, ProvName) :- addgeo(Geo, Name, Code, Strg), pmatch(Strg, "/([ÄA-Za-z][^;]*)/g", "$1", ProvName). % ?- sys.strat.doIt. % Obj/located[@country->C and @province->P] :- addgeoprov(Geo, Name, Code, ProvName), terra/Class->Obj[@name->Name and @country->C[@car_code->Code]], result/province->P[@country->C and @name->ProvName]. % ?- sys.strat.doIt. % ?- sys.echo@("Transpose cia ethnicgroups etc"). % E[text()->N and @percentage->P] :- result/country->C/ethnicgroups->E[@cia:name->N and cia:text()->P]. R[text()->N and @percentage->P] :- result/country->C/religions->R[@cia:name->N and cia:text()->P]. L[text()->N and @percentage->P] :- result/country->C/languages->L[@cia:name->N and cia:text()->P]. % ?- sys.strat.doIt. % B[@country-> C and @length->L] :- result/country/border->B[@cia:country-> C and cia:text()->L]. % ?- sys.strat.doIt. % C[province->P] :- result/province->P[@country->C]. % C[city->City] :- result/city->City[@country->C and not @province]. % P[city->City] :- result/city->City[@province->P]. % ?- sys.strat.doIt. % % ?- sys.echo@("*** Dumping OM ***"). % %?- sys.theOMAccess.export@("xml","","","result"). ?- sys.theOMAccess.export@("xml","mondial-3.0.xml","mondial","result").