FoodMart数据仓库mysql数据初始化
FoodMart简介
FoodMart是一个小型的数据仓库的示例,它基于食品超市的场景。
Mondrian 是一个JAVA写的OLAP引擎.,它从JDBC里读取聚合的数据并缓存在内存里,同时支持MDX查询和提供olap4j、 XML/A 相关API。
注:其它数据库如SQL Server、posgres、Oracle类似(即修改JDBC的URL并配置相应的JDBC驱动)。
下载mondrian-3.3.0
Mondrian download | SourceForge.net
2 点击File 选项后点击mondrian链接进入。
3 找到mondrian-3.3.0.14703后点击下载。
注:也可以选择3.2,当前使用的是3.3。
找到WAR包并解压
在下载的mondrian-3.3.0.14703.zip里找到mondrian.war。
找到需要的JAR包拷贝出来
用解压工具提取处WAR包里的7个jar包并拷贝到指定目录,当前是D:\FoodMart
olap4j.jar
mondrian.jar
log4j-1.2.8.jar
commons-logging-1.0.4.jar
eigenbase-xom.jar
eigenbase-resgen.jar
eigenbase-properties.jar
下载mysql驱动
当前下载的是mysql-connector-java-5.1.19.jar,并放在D:\FoodMart目录内。
拷贝数据文件对应SQL
将之前下载的mondrian-3.3.0.14703.zip里的FoodMartCreateData.sql拷贝到D:\FoodMart目录内。
Mysql里新建库
CREATE DATABASE IF NOT EXISTS foodmartDEFAULT CHARSET utf8;
Windows命令里执行数据加载程序
打开windows命令行,执行如下程序
java -cp D:\FoodMart\mondrian.jar;D:\FoodMart\log4j-1.2.8.jar;D:\FoodMart\commons-logging-1.0.4.jar;D:\FoodMart\eigenbase-xom.jar;D:\FoodMart\eigenbase-resgen.jar;D:\FoodMart\eigenbase-properties.jar;D:\FoodMart\mysql-connector-java-5.1.20-bin.jar;D:\FoodMart\olap4j.jar;D:\FoodMart\mysql-connector-java-5.1.19.jar mondrian.test.loader.MondrianFoodMartLoader -verbose -tables -data -indexes -jdbcDrivers="com.mysql.jdbc.Driver" -inputFile=D:\FoodMart\FoodMartCreateData.sql -outputJdbcURL="jdbc:mysql://localhost:3309/foodmart?user=root&password=root1234
注:数据库名、用户名、密码、端口按需修改。
数据库里查看
-- 查看sales_fact_1998数据
SELECT * FROM sales_fact_1998 limit 10
-- 查看表数量及数据大小
SELECT COUNT(TABLE_NAME) tabcnt,
(sum(DATA_LENGTH)+sum(INDEX_LENGTH))/1024.0/1024 dbsize_M
from information_schema.tables
where table_schema='foodmart';
重度汇总脚本
# Create aggregate tables for Mondrian
##################################################################
## agg_pl_01_sales_fact_1997 done
##################################################################
# physical
# lost "promotion_id" "store_id"
/*
7 fact tables: sales_fact_1997, sales_fact_1998, sales_fact_dec_1998, inventory_fact_1997, inventory_fact_1998, salary, expense_fact
19 dimension tables: product, customer, time_by_day, employee and more
11 aggregate tables
*/
INSERT INTO "agg_pl_01_sales_fact_1997" (
"product_id",
"time_id",
"customer_id",
"store_sales_sum",
"store_cost_sum",
"unit_sales_sum",
"fact_count"
) SELECT
"product_id" AS "product_id",
"time_id" AS "time_id",
"customer_id" AS "customer_id",
SUM("store_sales") AS "store_sales",
SUM("store_cost") AS "store_cost",
SUM("unit_sales") AS "unit_sales",
COUNT(*) AS fact_count
FROM "sales_fact_1997"
GROUP BY "product_id", "time_id", "customer_id";
INSERT INTO "agg_ll_01_sales_fact_1997" (
"product_id",
"time_id",
"customer_id",
"store_sales",
"store_cost",
"unit_sales",
"fact_count"
) SELECT
"product_id" AS "product_id",
"time_id" AS "time_id",
"customer_id" AS "customer_id",
SUM("store_sales") AS "store_sales",
SUM("store_cost") AS "store_cost",
SUM("unit_sales") AS "unit_sales",
COUNT(*) AS "fact_count"
FROM "sales_fact_1997"
GROUP BY "product_id", "time_id", "customer_id";
##################################################################
## agg_l_03_sales_fact_1997 done
##################################################################
# logical
# lost "product_id" "promotion_id" "store_id"
INSERT INTO "agg_l_03_sales_fact_1997" (
"customer_id",
"time_id",
"store_sales",
"store_cost",
"unit_sales",
"fact_count"
) SELECT
"customer_id",
"time_id",
SUM("store_sales") AS "store_sales",
SUM("store_cost") AS "store_cost",
SUM("unit_sales") AS "unit_sales",
COUNT(*) AS "fact_count"
FROM "sales_fact_1997"
GROUP BY "customer_id", "time_id";
##################################################################
## agg_lc_06_sales_fact_1997 done
##################################################################
# collapse "customer_id"
# lost "product_id" "promotion_id" "store_id"
INSERT INTO "agg_lc_06_sales_fact_1997" (
"time_id",
"city",
"state_province",
"country",
"store_sales",
"store_cost",
"unit_sales",
"fact_count"
) SELECT
"B"."time_id",
"D"."city",
"D"."state_province",
"D"."country",
SUM("B"."store_sales") AS "store_sales",
SUM("B"."store_cost") AS "store_cost",
SUM("B"."unit_sales") AS "unit_sales",
COUNT(*) AS fact_count
FROM "sales_fact_1997" "B", "customer" "D"
WHERE
"B"."customer_id" = "D"."customer_id"
GROUP BY
"B"."time_id",
"D"."city",
"D"."state_province",
"D"."country";
##################################################################
## agg_l_04_sales_fact_1997 done
##################################################################
# logical
# lost "customer_id" "product_id" "promotion_id" "store_id"
INSERT INTO "agg_l_04_sales_fact_1997" (
"time_id",
"store_sales",
"store_cost",
"unit_sales",
"customer_count",
"fact_count"
) SELECT
"time_id",
SUM("store_sales") AS "store_sales",
SUM("store_cost") AS "store_cost",
SUM("unit_sales") AS "unit_sales",
COUNT(DISTINCT "customer_id") AS "customer_count",
COUNT(*) AS "fact_count"
FROM "sales_fact_1997"
GROUP BY "time_id";
##################################################################
## agg_c_10_sales_fact_1997 done
##################################################################
# collapse "time_id"
# lost "customer_id" "product_id" "promotion_id" "store_id"
INSERT INTO "agg_c_10_sales_fact_1997" (
"month_of_year",
"quarter",
"the_year",
"store_sales",
"store_cost",
"unit_sales",
"customer_count",
"fact_count"
) SELECT
"D"."month_of_year",
"D"."quarter",
"D"."the_year",
SUM("B"."store_sales") AS "store_sales",
SUM("B"."store_cost") AS "store_cost",
SUM("B"."unit_sales") AS "unit_sales",
COUNT(DISTINCT "customer_id") AS "customer_count",
COUNT(*) AS fact_count
FROM "sales_fact_1997" "B", "time_by_day" "D"
WHERE
"B"."time_id" = "D"."time_id"
GROUP BY
"D"."month_of_year",
"D"."quarter",
"D"."the_year";
##################################################################
## agg_l_05_sales_fact_1997 done
##################################################################
# logical
# lost "time_id"
INSERT INTO "agg_l_05_sales_fact_1997" (
"product_id",
"customer_id",
"promotion_id",
"store_id",
"store_sales",
"store_cost",
"unit_sales",
"fact_count"
) SELECT
"product_id",
"customer_id",
"promotion_id",
"store_id",
SUM("store_sales") AS "store_sales",
SUM("store_cost") AS "store_cost",
SUM("unit_sales") AS "unit_sales",
COUNT(*) AS fact_count
FROM "sales_fact_1997"
GROUP BY "product_id", "customer_id", "promotion_id", "store_id";
##################################################################
## agg_c_14_sales_fact_1997 done
##################################################################
# collapse "time_id"
INSERT INTO "agg_c_14_sales_fact_1997" (
"product_id",
"customer_id",
"promotion_id",
"store_id",
"month_of_year",
"quarter",
"the_year",
"store_sales",
"store_cost",
"unit_sales",
"fact_count"
) SELECT
"B"."product_id",
"B"."customer_id",
"B"."promotion_id",
"B"."store_id",
"D"."month_of_year",
"D"."quarter",
"D"."the_year",
SUM("B"."store_sales") AS "store_sales",
SUM("B"."store_cost") AS "store_cost",
SUM("B"."unit_sales") AS "unit_sales",
COUNT(*) AS fact_count
FROM "sales_fact_1997" "B", "time_by_day" "D"
WHERE
"B"."time_id" = "D"."time_id"
GROUP BY "B"."product_id",
"B"."customer_id",
"B"."promotion_id",
"B"."store_id",
"D"."month_of_year",
"D"."quarter",
"D"."the_year";
##################################################################
## agg_lc_100_sales_fact_1997 done
##################################################################
# drop "promotion_id"
# drop "store_id"
# collapse "time_id"
INSERT INTO "agg_lc_100_sales_fact_1997" (
"product_id",
"customer_id",
"quarter",
"the_year",
"store_sales",
"store_cost",
"unit_sales",
"fact_count"
) SELECT
"B"."product_id",
"B"."customer_id",
"D"."quarter",
"D"."the_year",
SUM("B"."store_sales") AS "store_sales",
SUM("B"."store_cost") AS "store_cost",
SUM("B"."unit_sales") AS "unit_sales",
COUNT(*) AS fact_count
FROM "sales_fact_1997" "B", "time_by_day" "D"
WHERE
"B"."time_id" = "D"."time_id"
GROUP BY "B"."product_id",
"B"."customer_id",
"D"."quarter",
"D"."the_year";
##################################################################
##################################################################
## SPECIAL
##################################################################
##################################################################
## agg_c_special_sales_fact_1997 done
## based upon agg_c_14_sales_fact_1997
##################################################################
# collapse "time_id"
INSERT INTO "agg_c_special_sales_fact_1997" (
"product_id",
"customer_id",
"promotion_id",
"store_id",
"time_month",
"time_quarter",
"time_year",
"store_sales_sum",
"store_cost_sum",
"unit_sales_sum",
"fact_count"
) SELECT
"B"."product_id",
"B"."customer_id",
"B"."promotion_id",
"B"."store_id",
"D"."month_of_year",
"D"."quarter",
"D"."the_year",
SUM("B"."store_sales") AS "store_sales_sum",
SUM("B"."store_cost") AS "store_cost_sum",
SUM("B"."unit_sales") AS "unit_sales_sum",
COUNT(*) AS "fact_count"
FROM "sales_fact_1997" "B", "time_by_day" "D"
WHERE
"B"."time_id" = "D"."time_id"
GROUP BY "B"."product_id",
"B"."customer_id",
"B"."promotion_id",
"B"."store_id",
"D"."month_of_year",
"D"."quarter",
"D"."the_year";
##################################################################
# agg_gender_ms_state_sales_fact_1997
##################################################################
INSERT INTO "agg_g_ms_pcat_sales_fact_1997" (
"gender",
"marital_status",
"product_family",
"product_department",
"product_category",
"month_of_year",
"quarter",
"the_year",
"store_sales",
"store_cost",
"unit_sales",
"customer_count",
"fact_count"
) SELECT
"C"."gender",
"C"."marital_status",
"PC"."product_family",
"PC"."product_department",
"PC"."product_category",
"T"."month_of_year",
"T"."quarter",
"T"."the_year",
SUM("B"."store_sales") AS "store_sales",
SUM("B"."store_cost") AS "store_cost",
SUM("B"."unit_sales") AS "unit_sales",
COUNT(DISTINCT "C"."customer_id") AS "customer_count",
COUNT(*) AS "fact_count"
FROM "sales_fact_1997" "B",
"time_by_day" "T",
"product" "P",
"product_class" "PC",
"customer" "C"
WHERE
"B"."time_id" = "T"."time_id"
AND "B"."customer_id" = "C"."customer_id"
AND "B"."product_id" = "P"."product_id"
AND "P"."product_class_id" = "PC"."product_class_id"
GROUP BY
"C"."gender",
"C"."marital_status",
"PC"."product_family",
"PC"."product_department",
"PC"."product_category",
"T"."month_of_year",
"T"."quarter",
"T"."the_year";
/*
# Above query, rephrased for Access (which does not support
# COUNT(DISTINCT ...) explicitly.
#
#INSERT INTO "agg_g_ms_pcat_sales_fact_1997" (
# "gender",
# "marital_status",
# "product_family",
# "product_department",
# "product_category",
# "month_of_year",
# "quarter",
# "the_year",
# "store_sales",
# "store_cost",
# "unit_sales",
# "customer_count",
# "fact_count"
#) SELECT
# "C"."gender",
# "C"."marital_status",
# "PC"."product_family",
# "PC"."product_department",
# "PC"."product_category",
# "T"."month_of_year",
# "T"."quarter",
# "T"."the_year",
# SUM("B"."store_sales") AS "store_sales",
# SUM("B"."store_cost") AS "store_cost",
# SUM("B"."unit_sales") AS "unit_sales",
# (
# SELECT COUNT("customer_id")
# FROM (
# SELECT DISTINCT
# "DC"."gender",
# "DC"."marital_status",
# "DPC"."product_family",
# "DPC"."product_department",
# "DPC"."product_category",
# "DT"."month_of_year",
# "DT"."quarter",
# "DT"."the_year",
# "DB"."customer_id"
# FROM
# "sales_fact_1997" "DB",
# "time_by_day" "DT",
# "product" "DP",
# "product_class" "DPC",
# "customer" "DC"
# WHERE
# "DB"."time_id" = "DT"."time_id"
# AND "DB"."customer_id" = "DC"."customer_id"
# AND "DB"."product_id" = "DP"."product_id"
# AND "DP"."product_class_id" = "DPC"."product_class_id") AS "CDC"
# WHERE "CDC"."gender" = "C"."gender"
# AND "CDC"."marital_status" = "C"."marital_status"
# AND "CDC"."product_family" = "PC"."product_family"
# AND "CDC"."product_department" = "PC"."product_department"
# AND "CDC"."product_category" = "PC"."product_category"
# AND "CDC"."month_of_year" = "T"."month_of_year"
# AND "CDC"."quarter" = "T"."quarter"
# AND "CDC"."the_year" = "T"."the_year"
# GROUP BY
# "gender",
# "marital_status",
# "product_family",
# "product_department",
# "product_category",
# "month_of_year",
# "quarter",
# "the_year") AS "customer_count",
# COUNT(*) AS "fact_count"
#FROM "sales_fact_1997" "B",
# "time_by_day" "T",
# "product" "P",
# "product_class" "PC",
# "customer" "C"
#WHERE
# "B"."time_id" = "T"."time_id"
#AND "B"."customer_id" = "C"."customer_id"
#AND "B"."product_id" = "P"."product_id"
#AND "P"."product_class_id" = "PC"."product_class_id"
#GROUP BY
# "C"."gender",
# "C"."marital_status",
# "PC"."product_family",
# "PC"."product_department",
# "PC"."product_category",
# "T"."month_of_year",
# "T"."quarter",
# "T"."the_year";
# End insert.sql
*/
彩蛋
可访问已经整理好的数据库建表及数据脚本。
链接:https://pan.baidu.com/s/1d4CikASBHF6qTV9hst-qvQ
提取码:yx8c