*Kaggle - Brazilian E-commerce

Brazilian E-Commerce Public Dataset by Olist


1. 리뷰의 평균 평점이 높은 상위 category 파악

SQL code

SELECT
	t.product_category_name_english,
    AVG(r.review_score) AS avg_score
FROM mystore.products p
LEFT JOIN category_name_translation t ON p.product_category_name = t.product_category_name
LEFT JOIN order_items i ON p.product_id = i.product_id
LEFT JOIN order_reviews r ON i.order_id = r.order_id
GROUP BY t.product_category_name_english
ORDER BY avg_score DESC;
SELECT
	AVG(r.review_score) AS avg_score
FROM mystore.order_reviews r
SELECT
	t.product_category_name_english,
    AVG(r.review_score) AS avg_score
FROM mystore.products p
LEFT JOIN category_name_translation t ON p.product_category_name = t.product_category_name
LEFT JOIN order_items i ON p.product_id = i.product_id
LEFT JOIN order_reviews r ON i.order_id = r.order_id
GROUP BY t.product_category_name_english
HAVING AVG(r.review_score) > "4.0878"
ORDER BY avg_score DESC;

2. 주문율이 높은 시간대 및 제품을 활용한 마케팅 전략 수립

SQL code

SELECT
	product_id,
  TIME(o.order_purchase_timestamp) AS purchase_time
FROM mystore.orders o
LEFT JOIN order_items i ON o.order_id = i.order_id
ORDER BY TIME(o.order_purchase_timestamp) DESC;
SELECT
    DATE_FORMAT(o.order_purchase_timestamp, '%H:00:00') AS hour_range,
    t.product_category_name_english,
    COUNT(DISTINCT o.order_id) AS order_count
FROM mystore.products p
LEFT JOIN category_name_translation t ON p.product_category_name = t.product_category_name
LEFT JOIN order_items i ON p.product_id = i.product_id
LEFT JOIN order_reviews r ON i.order_id = r.order_id
LEFT JOIN orders o ON i.order_id = o.order_id
GROUP BY DATE_FORMAT(o.order_purchase_timestamp, '%H:00:00'), t.product_category_name_english
ORDER BY order_count DESC;
SELECT
    CASE
        WHEN HOUR(o.order_purchase_timestamp) >= 11 AND HOUR(o.order_purchase_timestamp) < 15 THEN '11:00~15:00'
        WHEN HOUR(o.order_purchase_timestamp) >= 15 AND HOUR(o.order_purchase_timestamp) < 19 THEN '15:00~19:00'
        WHEN HOUR(o.order_purchase_timestamp) >= 19 AND HOUR(o.order_purchase_timestamp) < 23 THEN '19:00~23:00'
        WHEN HOUR(o.order_purchase_timestamp) >= 23 OR HOUR(o.order_purchase_timestamp) < 2 THEN '23:00~02:00'
        WHEN HOUR(o.order_purchase_timestamp) >= 2 AND HOUR(o.order_purchase_timestamp) < 7 THEN '02:00~07:00'
        WHEN HOUR(o.order_purchase_timestamp) >= 7 AND HOUR(o.order_purchase_timestamp) < 11 THEN '07:00~11:00'
    END AS hour_range,
    COUNT(DISTINCT o.order_id) AS order_count
FROM mystore.products p
LEFT JOIN category_name_translation t ON p.product_category_name = t.product_category_name
LEFT JOIN order_items i ON p.product_id = i.product_id
LEFT JOIN order_reviews r ON i.order_id = r.order_id
LEFT JOIN orders o ON i.order_id = o.order_id
GROUP BY hour_range
ORDER BY hour_range, order_count DESC;
SELECT
    CASE
        WHEN HOUR(o.order_purchase_timestamp) >= 11 AND HOUR(o.order_purchase_timestamp) < 15 THEN '11:00~15:00'
        WHEN HOUR(o.order_purchase_timestamp) >= 15 AND HOUR(o.order_purchase_timestamp) < 18 THEN '15:00~19:00'
        WHEN HOUR(o.order_purchase_timestamp) >= 19 AND HOUR(o.order_purchase_timestamp) < 23 THEN '19:00~23:00'
        WHEN HOUR(o.order_purchase_timestamp) >= 23 AND HOUR(o.order_purchase_timestamp) < 02 THEN '23:00~02:00'
		WHEN HOUR(o.order_purchase_timestamp) < 2 THEN '23:00~02:00'
        WHEN HOUR(o.order_purchase_timestamp) >= 7 AND HOUR(o.order_purchase_timestamp) < 11 THEN '07:00~11:00'
        ELSE '02:00~07:00'
    END AS hour_range,
    COUNT(DISTINCT o.order_id) AS order_count,
    t.product_category_name_english
FROM mystore.products p
LEFT JOIN category_name_translation t ON p.product_category_name = t.product_category_name
LEFT JOIN order_items i ON p.product_id = i.product_id
LEFT JOIN order_reviews r ON i.order_id = r.order_id
LEFT JOIN orders o ON i.order_id = o.order_id
GROUP BY hour_range
ORDER BY hour_range, order_count DESC;

Python code(matplotlib)

1. 시간대별 총 주문량