class Category(Base):
__tablename__ = 'category'
id = mapped_column(Integer, primary_key=True) # autoincrement.
type = mapped_column(CHAR(3), nullable=False) # C.
caid = mapped_column(CHAR(10), nullable=True) # base36, C0..
s_category = mapped_column(VARCHAR(30), nullable=False)
m_category = mapped_column(VARCHAR(30), nullable=False)
url = mapped_column(VARCHAR(500), nullable=False)
s_topics = mapped_column(JSON, nullable=True)
m_topics = mapped_column(JSON, nullable=True)
__table_args__ = (
UniqueConstraint('type', 'id', name='uq_type_id'),
Index('ix_s_category', 's_category'),
Index('ix_caid', 'caid'),
)
class IP(Base):
__tablename__ = 'ip'
id = mapped_column(Integer, primary_key=True) # autoincrement.
address = mapped_column(VARCHAR(30), nullable=False)
country = mapped_column(VARCHAR(30), nullable=True)
user_agent = mapped_column(VARCHAR(200), nullable=True)
naver = mapped_column(CHAR(10), nullable=False)
timestamp = mapped_column(TIMESTAMP, nullable=False, server_default=text("CURRENT_TIMESTAMP"))
__table_args__ = (
UniqueConstraint('address', name='uq_ip_address'),
)
class Product(Base):
__tablename__ = 'product'
id = mapped_column(Integer, primary_key=True) # autoincrement.
type = mapped_column(CHAR(3), nullable=False) # P.
caid = mapped_column(CHAR(10), nullable=False) # base36, C0..
prid = mapped_column(CHAR(10), nullable=True) # base36, P0..
url = mapped_column(VARCHAR(500), nullable=False)
grade = mapped_column(FLOAT(precision=2, decimal_return_scale=1), nullable=False)
name = mapped_column(VARCHAR(100), nullable=False)
lowest_price = mapped_column(Integer, nullable=False)
review_count = mapped_column(Integer, nullable=False)
match_nv_mid = mapped_column(VARCHAR(30), nullable=False)
# Below mapped_columns are updated when crawlers entered the product link (secondary-crawler).
brand = mapped_column(VARCHAR(20), nullable=True)
maker = mapped_column(VARCHAR(20), nullable=True)
naver_spec = mapped_column(JSON, nullable=True) # with locations
seller_spec = mapped_column(JSON, nullable=True) # with locations
# if seller_spec: crawler get also image_urls.
detail_image_urls = mapped_column(JSON, nullable=True)
__table_args__ = (
UniqueConstraint('type', 'id', name='uq_product_type_id'),
# Index('ix_caid', 'caid'), # caid (내부 검색 key)
Index('ix_match_nv_mid', 'match_nv_mid'), # match_nv_mid (내부 검색 key)
Index('ix_prid', 'prid'), # prid (내부 검색 key)
Index('ix_grade', 'grade'), # grade (정렬 목적)
Index('ix_name', asc(name)), # name (검색 및 정렬 목적)
Index('ix_lowest_price', "lowest_price"), # lowest_price (정렬 목적)
Index('ix_review_count', 'review_count'), # review_count (정렬 목적)
Index('ix_brand', 'brand'), # brand (검색 및 그룹화 목적)
)
class ProductHistory(Base):
__tablename__ = 'product_history'
id = mapped_column(Integer, primary_key=True) # autoincrement.
caid = mapped_column(CHAR(10), nullable=False) # base36, C0..
prid = mapped_column(CHAR(10), nullable=False) # base36, P0..
review_count = mapped_column(Integer, nullable=False)
grade = mapped_column(FLOAT(precision=2, decimal_return_scale=1), nullable=False)
lowest_price = mapped_column(Integer, nullable=False)
timestamp = mapped_column(TIMESTAMP, nullable=False)
__table_args__ = (
UniqueConstraint('prid','timestamp', name='uq_prod_hist_prid_timestamp'),
Index('ix_prid', 'prid'), # prid (aggregate 목적)
Index('ix_caid', 'caid'), # caid (aggregate 목적)
Index('ix_timestamp', desc(timestamp)), # review_count (aggregate 목적)
)
class Review(Base):
__tablename__ = 'review'
id = mapped_column(Integer, primary_key=True) # autoincrement.
type = mapped_column(CHAR(3), nullable=False) # R.
prid = mapped_column(CHAR(10), nullable=False) # base36, P0..
caid = mapped_column(CHAR(10), nullable=False) # base36, C0..
reid = mapped_column(CHAR(10), nullable=True) # base36, R0..
content = mapped_column(VARCHAR(10000), nullable=False)
# our_topics = mapped_column(JSON, nullable=True),
our_topics_yn = mapped_column(CHAR(1), nullable=False)
n_review_id = mapped_column(VARCHAR(100), nullable=False)
quality_score = mapped_column(FLOAT(precision=5, decimal_return_scale=5), nullable=False)
buy_option = mapped_column(VARCHAR(100), nullable=True)
star_score = mapped_column(Integer, nullable=False)
topic_count = mapped_column(Integer, nullable=False)
topic_yn = mapped_column(CHAR(1), nullable=False)
topics = mapped_column(JSON, nullable=True)
user_id = mapped_column(CHAR(20), nullable=False)
aida_modify_time = mapped_column(TIMESTAMP(timezone=True), nullable=False)
mall_id = mapped_column(VARCHAR(30), nullable=False)
mall_seq = mapped_column(VARCHAR(30), nullable=False)
mall_name = mapped_column(VARCHAR(50), nullable=True)
match_nv_mid = mapped_column(VARCHAR(30), nullable=False)
nv_mid = mapped_column(VARCHAR(30), nullable=False)
image_urls = mapped_column(JSON, nullable=False)
__table_args__ = (
UniqueConstraint('type', 'id', name='uq_review_type_id'),
Index('ix_prid', 'prid'), # prid (aggregate 목적)
# Index('ix_caid', 'caid'), # caid (aggregate 목적)
Index('ix_reid', 'reid'), # reid (검색 key 목적)
)
class Topic(Base):
__tablename__ = 'topic'
id = mapped_column(Integer, primary_key=True) # autoincrement.
type = mapped_column(CHAR(3), nullable=False) # RT0 (Review Topic) or IT0 (Image Topic)
tpid = mapped_column(VARCHAR(10), nullable=True) # base36, T0.. # 현재 없어도 됌.
prid = mapped_column(VARCHAR(10), nullable=True) # base36, P0.. join review table
# caid = mapped_column(VARCHAR(10), nullable=False) # base36, C0.. join review table
reid = mapped_column(VARCHAR(10), nullable=True) # base36, R0..
text = mapped_column(VARCHAR(1000), nullable=False) # topic text (Could be deleted)
topic_code = mapped_column(VARCHAR(50), nullable=True) # topic code (english)
topic_name = mapped_column(VARCHAR(20), nullable=False) # topic name (korean)
topic_score = mapped_column(Integer, nullable=True) # topic score
start_pos = mapped_column(Integer, nullable=False)
end_pos = mapped_column(Integer, nullable=False)
positive_yn = mapped_column(CHAR(1), nullable=True) # topic sentiment (Y, N)
sentiment_scale = mapped_column(Integer, nullable=True) # topic sentiment scale
bbox = mapped_column(JSON, nullable=True) # bounding box [[x1, y1], [x2, y2], [x3, y3], [x4, y4]]
image_number = mapped_column(Integer, nullable=True) # image number (for ocr topic)
__table_args__ = (
UniqueConstraint('type', 'id', name='uq_topic_type_id'),
# At production level. It should be indexed.
Index('ix_reid', 'reid'), # reid (검색 key 목적)
Index('ix_prid', 'prid'), # prid (검색 key 목적)
Index('ix_topic_code', 'topic_code', 'topic_score'), # topic_code (검색 key와 점수 목적)
Index('ix_sentiment', 'positive_yn', 'sentiment_scale') # sentiment (검색 key와 점수 목적)
)