๐ฉ๐ป ๋ฐฑ์๋(Back-End)/Node js
HTML์์ Python์ ์ฌ์ฉํ ์ ์๋ PyScript (12)
์ง์ง์ํ์นด
2022. 11. 28. 09:37
728x90
๋ฐ์ํ
<๋ณธ ๋ธ๋ก๊ทธ๋ itadventrue ๋์ ๋ธ๋ก๊ทธ๋ฅผ ์ฐธ๊ณ ํด์ ๊ณต๋ถํ๋ฉฐ ์์ฑํ์์ต๋๋ค :-)>
https://itadventure.tistory.com/554
ํ๋!(13) - ์? ์ธ๊ณต์ง๋ฅ ์ ์ค์จ์ด?! - ํ๊ท ๊ฐ๊ฒฉ ์ถ๊ฐ
'ํ๋'๋ ํ์ด์คํฌ๋ฆฝํธ ๋์ ๊ธฐ์ ์ค์๋ง์ ๋๋ค. ์ง๋ ๊ฒ์๊ธ์์ ์ด์ด์ง๋ ๋ด์ฉ์ ๋๋ค : https://itadventure.tistory.com/553 ํ๋!(12) - ๋ฌด์ ๋ฌ๋? ๋จธ์ ๋ฌ๋! - ๋ฆฌ๋์ด ๋ฆฌ๊ทธ๋ ์ ( LinearRegression ) 'ํ๋'๋
itadventure.tistory.com
๐ ํ๊ท ๊ฐ๊ฒฉ ํฌํจ
ํ๊ท ๊ฐ๊ฒฉ์ ํฌํจํ๊ธฐ ์ํด CSV ํ์ผ์ ์ฝ๊ณ 3๊ฐ์ ์ปฌ๋ผ์ ๋ฐ์์ค๋๋ก ๋ณ๊ฒฝ
AveragePrice ๊ฐ ์ ๊ณต๋ ๋ฐ์ดํฐ์ค ํ๊ท ๊ฐ๊ฒฉ
- 3๊ฐ์ ์ปฌ๋ผ
# ํ๋ค์ค์์ csv ๋ฅผ ๋ฐ์ดํฐ ํ๋ ์์ผ๋ก ์ฝ์ด์ด
๋งค์ถ๋ฐ์ดํฐ = pd.read_csv(open_url(
"http://dreamplan7.cafe24.com/pyscript/csv/avocado.csv"
))
# 3๊ฐ ํ๋๋ง ์ถ๋ ค์ ๋ฐ์ดํฐ ํ๋ ์์ ๋ค์ ๋ง๋ฌ
๋งค์ถ๋ฐ์ดํฐ = ๋งค์ถ๋ฐ์ดํฐ[[
'Date',
'Total Volume',
'AveragePrice'
]]
- ๋ ์ง๋ณ๋ก ( ์ฃผ ๋จ์๋ก ) ๊ทธ๋ฃน์ ์ง์ ๋๋ ๋งค์ถ๋์ ๊ทธ๋ฃน๋จ์๋ก ํฉ์ฐํ์ฌ ํฉ๊ณ
์ฃผ๊ฐ๋งค์ถ_๋งค์ถ๋=๋งค์ถ๋ฐ์ดํฐ.fillna(0) \
.groupby('๋ ์ง', as_index=False)[['๋งค์ถ๋']].sum() \
.sort_values(by='๋ ์ง', ascending=True)
์ฃผ๊ฐ๋งค์ถ_ํ๊ท ๊ฐ=๋งค์ถ๋ฐ์ดํฐ.fillna(0) \
.groupby('๋ ์ง', as_index=False)[['ํ๊ท ๊ฐ๊ฒฉ']].mean() \
.sort_values(by='๋ ์ง', ascending=True)
- 2๊ฐ์ ๋ฐ์ดํฐ ํ๋ ์์ ํ๋๋ก merge (on์ ๊ธฐ์ฌ๋ '๋ ์ง'๋ฅผ ๊ธฐ์ค)
์ฃผ๊ฐ๋งค์ถ๋ฐ์ดํฐ=pd.merge(์ฃผ๊ฐ๋งค์ถ_๋งค์ถ๋, ์ฃผ๊ฐ๋งค์ถ_ํ๊ท ๊ฐ, on='๋ ์ง')
์ฃผ๊ฐ๋งค์ถ๋ฐ์ดํฐํ๋ จ_๋ํ์ด = ์ฃผ๊ฐ๋งค์ถ๋ฐ์ดํฐ[['๋ ์ง(์๊ฐ๊ฐ)', '์ฐ๋', '์', '์ผ', '์ฃผ', 'ํ๊ท ๊ฐ๊ฒฉ']].to_numpy()
๐ ๋ฐ์ดํฐ ์ค์ผ์ผ๋ง
์ค์ผ์ผํ๋ '๋ฐ์ดํฐ๋ฅผ ์์ ํ'
StandardScaler ์ฌ์ฉ
from sklearn.preprocessing import StandardScaler
์ค์ผ์ผ๋ฌ = StandardScaler()
์ค์ผ์ผ๋ฌ.fit(ํ๋ จ์ฉ๋ฐ์ดํฐ)
ํ๋ จ์ฉ๋ฐ์ดํฐ_์ค์ผ์ผ = ์ค์ผ์ผ๋ฌ.transform(ํ๋ จ์ฉ๋ฐ์ดํฐ)
ํ
์คํธ๋ฐ์ดํฐ_์ค์ผ์ผ = ์ค์ผ์ผ๋ฌ.transform(ํ
์คํธ๋ฐ์ดํฐ)
๐ ๋ฐ์ดํฐ ์ค์ฝ์ด
from sklearn.linear_model import LinearRegression
์ ํํ๊ท๋ชจ๋ธ = LinearRegression()
์ ํํ๊ท๋ชจ๋ธ.fit(ํ๋ จ์ฉ๋ฐ์ดํฐ_์ค์ผ์ผ, ํ๋ จ์ฉ๋ชฉํ)
- ํ๋ จ๊ณผ์ ์ ๋ํ ์ฒ๋๋ฅผ ํ๊ฐ -> score()
print("ํ๋ จ์ฉ๋ชจ๋ธ ์ ํ๋")
print(์ ํํ๊ท๋ชจ๋ธ.score(ํ๋ จ์ฉ๋ฐ์ดํฐ_์ค์ผ์ผ, ํ๋ จ์ฉ๋ชฉํ))
print("ํ
์คํธ๋ชจ๋ธ ์ ํ๋")
print(์ ํํ๊ท๋ชจ๋ธ.score(ํ
์คํธ๋ฐ์ดํฐ_์ค์ผ์ผ, ํ
์คํธ๋ชฉํ))
- ์ค์ผ์ผํ๋ ๋ฐ์ดํฐ๋ฅผ ๋ฐํ์ผ๋ก ์์ธก๊ฒฐ๊ณผ
ํ๋ จ์ฉ๋ชฉํ์์ธก = ์ ํํ๊ท๋ชจ๋ธ.predict(ํ๋ จ์ฉ๋ฐ์ดํฐ_์ค์ผ์ผ)
ํ
์คํธ๋ชฉํ์์ธก = ์ ํํ๊ท๋ชจ๋ธ.predict(ํ
์คํธ๋ฐ์ดํฐ_์ค์ผ์ผ)
๐ ์ฝ๋ ๊ตฌํ
- index.html
<html>
<head>
<link rel="stylesheet"
href="https://pyscript.net/alpha/pyscript.css" />
<script defer
src="https://pyscript.net/alpha/pyscript.js"></script>
<py-env>
- pandas
- matplotlib
- seaborn
- scikit-learn
- paths :
- ./common.py
</py-env>
</head>
<body>
<link rel="stylesheet" href="pytable.css"/>
<py-script>
import pandas as pd
from pyodide.http import open_url
from common import *
import numpy as np
from datetime import datetime
<!-- ๊ฒฝ๊ณ ๋ฌธ๊ตฌ ์ ๊ฑฐ -->
import warnings
warnings.filterwarnings( 'ignore' )
<!-- ํ๋ค์ค์์ csv ๋ฅผ ๋ฐ์ดํฐ ํ๋ ์์ผ๋ก ์ฝ์ด์ด -->
SalesData = pd.read_csv(open_url(
"http://dreamplan7.cafe24.com/pyscript/csv/avocado.csv"
))
<!-- # 3๊ฐ ํ๋๋ง ์ถ๋ ค์ ๋ฐ์ดํฐ ํ๋ ์์ ๋ค์ ๋ง๋ฌ -->
SalesData = SalesData[[
'Date',
'Total Volume',
'AveragePrice'
]]
SalesData.columns = [
'Day',
'Amount',
'AveragePrice'
]
<!-- ๋ ์ง๋ณ๋ก ( ์ฃผ ๋จ์๋ก ) ๊ทธ๋ฃน์ ์ง์ ๋๋ ๋งค์ถ๋์ ๊ทธ๋ฃน๋จ์๋ก ํฉ์ฐํ์ฌ ํฉ๊ณ -->
WeekdaysSales_sum = SalesData.fillna(0) \
.groupby('Day', as_index=False)[['Amount']].sum() \
.sort_values(by='Day', ascending=True)
WeekdaysSales_mean = SalesData.fillna(0) \
.groupby('Day', as_index=False)[['AveragePrice']].mean() \
.sort_values(by='Day', ascending=True)
<!-- 2๊ฐ์ ๋ฐ์ดํฐ ํ๋ ์์ ํ๋๋ก merge (on์ ๊ธฐ์ฌ๋ '๋ ์ง'๋ฅผ ๊ธฐ์ค) -->
WeekdaysSalesData = pd.merge(WeekdaysSales_sum, WeekdaysSales_mean, on = 'Day')
<!-- ๋ ์ง(์๊ฐ๊ฐ) ์ถ๊ฐ -->
WeekdaysSalesData.insert(1, 'Day(timeValue)',
'', True)
for i in WeekdaysSalesData['Day'].index:
WeekdaysSalesData['Day(timeValue)'].loc[i]=time.mktime(
datetime.strptime(
WeekdaysSalesData['Day'].loc[i],
'%Y-%m-%d'
).timetuple()
)
<!-- 10000์ผ๋ก ๋๋ ๋งค์ถ๋ ํ๋ ์ถ๊ฐ -->
WeekdaysSalesData.insert(3, 'Amount(10000)',
WeekdaysSalesData['Amount']/10000,
True)
<!-- ํ๋ จํ์ต์ฉ์ผ๋ก ๋ ์ง๋ฅผ ์ฐ๋, ์, ์ผ๋ก ๋๋๋ค -->
WeekdaysSalesData.insert(4, 'year', '', True)
WeekdaysSalesData.insert(5, 'month', '', True)
WeekdaysSalesData.insert(6, 'day', '', True)
WeekdaysSalesData.insert(7, 'week', '', True)
for i in WeekdaysSalesData['Day'].index:
temp = str(WeekdaysSalesData['Day'].loc[i]).split('-')
year = int(temp[0])
month = int(temp[1])
day = int(temp[2])
WeekdaysSalesData['year'].loc[i] = year
WeekdaysSalesData['month'].loc[i] = month
WeekdaysSalesData['day'].loc[i] = day
WeekdaysSalesData['week'].loc[i] = str(
datetime(year, month, day).isocalendar()[1]
)
createElementDiv(
document,
Element,
'output2'
).write(WeekdaysSalesData)
WeekdaysSalesDataTrain_numpy = WeekdaysSalesData[['Day(timeValue)', 'year', 'month', 'day', 'week', 'AveragePrice']].to_numpy()
WeekdaysSalesDataTest_numpy = WeekdaysSalesData['Amount(10000)'].to_numpy()
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = \
train_test_split(
WeekdaysSalesDataTrain_numpy,
WeekdaysSalesDataTest_numpy,
random_state=100,
shuffle=False)
<!-- ์ค์ผ์ผํ๋ '๋ฐ์ดํฐ๋ฅผ ์์ ํ' -->
from sklearn.preprocessing import StandardScaler
sclar = StandardScaler()
sclar.fit(X_train)
X_train_scalr = sclar.transform(X_train)
X_test_scalr = sclar.transform(X_test)
<!-- ์ ํ ํ๊ท ์๊ณ ๋ฆฌ์ฆ -->
<!-- ํ๋ จ, ์ต์ ์ ๊ทธ๋ํ๋ฅผ ์ฐพ์์ค๋ค -->
from sklearn.linear_model import LinearRegression
lr = LinearRegression()
lr.fit(X_train_scalr, y_train)
<!-- ์ข
๋ฅ๊ฐ ๋ชฉํ๊ฐ ์๋ ์ด์ ์ ํ๋๋ ์ธก์ ๋ถ๊ฐ -->
<!-- ํ๋ จ๊ณผ์ ์ ๋ํ ์ฒ๋๋ฅผ ํ๊ฐ -> score() -->
print("ํ๋ จ์ฉ๋ชจ๋ธ ์ ํ๋")
print(lr.score(X_train_scalr, y_train))
print("ํ
์คํธ๋ชจ๋ธ ์ ํ๋")
print(lr.score(X_test_scalr, y_test))
<!-- ์ค์ผ์ผํ๋ ๋ฐ์ดํฐ๋ฅผ ๋ฐํ์ผ๋ก ์์ธก๊ฒฐ๊ณผ -->
y_train_predict = lr.predict(X_train_scalr)
y_test_predict = lr.predict(X_test_scalr)
import matplotlib.pyplot as plt
import matplotlib as mat
<!-- ๊ทธ๋ํ -->
fig = plt.figure(
figsize=(15, 7)
)
plt.xticks(WeekdaysSalesData['Day(timeValue)'].to_numpy(), WeekdaysSalesData[['Day']].to_numpy()[:,0], rotation=90)
plt.title('Weekdays Avocado SalesAmount')
plt.plot(
X_train[:,0],
y_train,
marker='o',
color='#c14549',
label='Original'
)
plt.plot(
X_train[:,0],
y_train_predict,
marker='d',
color='blue',
label='Train pattern'
)
plt.plot(
X_test[:, 0],
y_test,
marker='o',
color='#c14549'
)
plt.plot(
X_test[:, 0],
y_test_predict,
marker='d',
color='green',
label='Predict pattern'
)
plt.xlabel('Day')
plt.ylabel('Day(timeValue)')
plt.legend(
shadow=True
)
ax = plt.gca()
<!-- ์ถ๋ง ๊ทธ๋ฆฌ๋ -->
ax.xaxis.grid(True)
<!-- ๋ฐฐ๊ฒฝ์, ๋ง์ง ์กฐ์ -->
ax.set_facecolor('#e8e7d2')
ax.margins(x=0.01, y=0.02)
<!-- ์ฃผ์ ์ด์ํ ์ฌ๋ฐฑ ์์ ๊ธฐ -->
fig.tight_layout()
fig
</py-script>
</body>
</html>
- common.py
def createElementDiv(document, Element, name):
element = document.createElement('div')
element.id = name
document.body.append(element)
return Element(name)
728x90
๋ฐ์ํ