[3] pykan 라이브러리 사용법 : Examples

카테고리 없음 2024. 10. 14. 22:24

Kolmogorov-Arnold Network(KAN)을 가장 쉽게 사용할 수 있는 방법! pykan 라이브러리 사용법에 대해 정리해보고자 한다.

※ 모든 내용은 https://kindxiaoming.github.io/pykan/intro.html 의 내용을 번역한 것으로 모든 저작권은 해당 페이지에 있습니다. 부정확한 표현이 있을 수 있으니 원문 페이지를 꼭 참고해주세요!

Example 1 : Function Fitting

이번 예제에서는 그리드를 세분화하여 KAN의 함수 적합 능력을 극대화해보고자 한다.

from kan import *


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

# initialize KAN with G=3
model = KAN(width=[2,1,1], grid=3, k=3, seed=1, device=device)

# create dataset
f = lambda x: torch.exp(torch.sin(torch.pi*x[:,[0]]) + x[:,[1]]**2)
dataset = create_dataset(f, n_var=2, device=device)

model.fit(dataset, opt="LBFGS", steps=20);

>> | train_loss: 1.42e-02 | test_loss: 1.43e-02 | reg: 1.16e+01 |

여기서 좀 더 세밀한 KAN으로 설정해보자.

# initialize a more fine-grained KAN with G=10
model = model.refine(10)
model.fit(dataset, opt="LBFGS", steps=20);

>> | train_loss: 9.52e-04 | test_loss: 9.80e-04 | reg: 1.16e+01 |

loss가 더 좋아진 것을 확인할 수 있다. grid를 더 finer하게 수정해보자.

refine(new_grid) : grid refinement

Args:

new_grid : init
the number of grid intervals after refinement

Returns:

a refined model : MultKAN

Example

>>> from kan import *
>>> device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
>>> model = KAN(width=[2,5,1], grid=5, k=3, seed=0)
>>> print(model.grid)
>>> x = torch.rand(100,2)
>>> model.get_act(x)
>>> model = model.refine(10)
>>> print(model.grid)
checkpoint directory created: ./model
saving model version 0.0
5
saving model version 0.1
10

grids = np.array([3,10,20,50,100])


train_losses = []
test_losses = []
steps = 200
k = 3

for i in range(grids.shape[0]):
    if i == 0:
        model = KAN(width=[2,1,1], grid=grids[i], k=k, seed=1, device=device)
    if i != 0:
        model = model.refine(grids[i])
    results = model.fit(dataset, opt="LBFGS", steps=steps)
    train_losses += results['train_loss']
    test_losses += results['test_loss']

plt.plot(train_losses)
plt.plot(test_losses)
plt.legend(['train', 'test'])
plt.ylabel('RMSE')
plt.xlabel('step')
plt.yscale('log')

n_params = 3 * grids
train_vs_G = train_losses[(steps-1)::steps]
test_vs_G = test_losses[(steps-1)::steps]
plt.plot(n_params, train_vs_G, marker="o")
plt.plot(n_params, test_vs_G, marker="o")
plt.plot(n_params, 100*n_params**(-4.), ls="--", color="black")
plt.xscale('log')
plt.yscale('log')
plt.legend(['train', 'test', r'$N^{-4}$'])
plt.xlabel('number of params')
plt.ylabel('RMSE')

Example 3 : Deep Formulas

Tree-layer KAN

from kan import *

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

# create a KAN: 2D inputs, 1D output, and 5 hidden neurons. cubic spline (k=3), 5 grid intervals (grid=5).
model = KAN(width=[4,2,1,1], grid=3, k=3, seed=1, device=device)
f = lambda x: torch.exp((torch.sin(torch.pi*(x[:,[0]]**2+x[:,[1]]**2))+torch.sin(torch.pi*(x[:,[2]]**2+x[:,[3]]**2)))/2)
dataset = create_dataset(f, n_var=4, train_num=3000, device=device)

# train the model
model.fit(dataset, opt="LBFGS", steps=20, lamb=0.002, lamb_entropy=2.);

model = model.prune(edge_th = 1e-2)
model.plot()

grids = [3,5,10,20,50]
#grids = [5]

train_rmse = []
test_rmse = []

for i in range(len(grids)):
    #model = KAN(width=[4,2,1,1], grid=grids[i], k=3, seed=0, device=device).initialize_from_another_model(model, dataset['train_input'])
    model = model.refine(grids[i])
    results = model.fit(dataset, opt="LBFGS", steps=50, stop_grid_update_step=20);
    train_rmse.append(results['train_loss'][-1].item())
    test_rmse.append(results['test_loss'][-1].item())

Two-layer KAN

이 예제에서는 2개의 KAN layer를 쌓는 것이 3개의 KAN layer를 쌓는 것보다 더 성능이 좋지 않다는 것을 보일 것이다.

from kan import KAN, create_dataset
import torch

# create a KAN: 2D inputs, 1D output, and 5 hidden neurons. cubic spline (k=3), 5 grid intervals (grid=5).
model = KAN(width=[4,9,1], grid=3, k=3, seed=0)
f = lambda x: torch.exp((torch.sin(torch.pi*(x[:,[0]]**2+x[:,[1]]**2))+torch.sin(torch.pi*(x[:,[2]]**2+x[:,[3]]**2)))/2)
dataset = create_dataset(f, n_var=4, train_num=3000)

# train the model
model.fit(dataset, opt="LBFGS", steps=20, lamb=0.002, lamb_entropy=2.);
model.plot(beta=10)

grids = [3,5,10,20,50]

train_rmse = []
test_rmse = []

for i in range(len(grids)):
    #model = KAN(width=[4,9,1], grid=grids[i], k=3, seed=0).initialize_from_another_model(model, dataset['train_input'])
    model = model.refine(grids[i])
    results = model.fit(dataset, opt="LBFGS", steps=50, stop_grid_update_step=30);
    train_rmse.append(results['train_loss'][-1].item())
    test_rmse.append(results['test_loss'][-1].item())

import numpy as np
import matplotlib.pyplot as plt

n_params = np.array(grids) * (4*9+9*1)
plt.plot(n_params, train_rmse, marker="o")
plt.plot(n_params, test_rmse, marker="o")
plt.plot(n_params, 300*n_params**(-2.), color="black", ls="--")
plt.legend(['train', 'test', r'$N^{-4}$'], loc="lower left")
plt.xscale('log')
plt.yscale('log')
print(train_rmse)
print(test_rmse)

Example 4 : Classification

Regression formulation

regression problem(output dimension = 1, MSE loss) - 2개의 달 데이터 셋을 생성하자.

from kan import *
import matplotlib.pyplot as plt
from sklearn.datasets import make_moons
import numpy as np

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

dataset = {}
train_input, train_label = make_moons(n_samples=1000, shuffle=True, noise=0.1, random_state=None)
test_input, test_label = make_moons(n_samples=1000, shuffle=True, noise=0.1, random_state=None)

dtype = torch.get_default_dtype()
dataset['train_input'] = torch.from_numpy(train_input).type(dtype).to(device)
dataset['test_input'] = torch.from_numpy(test_input).type(dtype).to(device)
dataset['train_label'] = torch.from_numpy(train_label[:,None]).type(dtype).to(device)
dataset['test_label'] = torch.from_numpy(test_label[:,None]).type(dtype).to(device)

X = dataset['train_input']
y = dataset['train_label']
plt.scatter(X[:,0].cpu().detach().numpy(), X[:,1].cpu().detach().numpy(), c=y[:,0].cpu().detach().numpy())

model = KAN(width=[2,1], grid=3, k=3, device=device)

def train_acc():
    return torch.mean((torch.round(model(dataset['train_input'])[:,0]) == dataset['train_label'][:,0]).type(dtype))

def test_acc():
    return torch.mean((torch.round(model(dataset['test_input'])[:,0]) == dataset['test_label'][:,0]).type(dtype))

results = model.fit(dataset, opt="LBFGS", steps=20, metrics=(train_acc, test_acc));
results['train_acc'][-1], results['test_acc'][-1]
>> (1.0, 0.9990000128746033)

lib = ['x','x^2','x^3','x^4','exp','log','sqrt','tanh','sin','tan','abs']
model.auto_symbolic(lib=lib)
formula = model.symbolic_formula()[0][0]
ex_round(formula, 4)

# how accurate is this formula?
def acc(formula, X, y):
    batch = X.shape[0]
    correct = 0
    for i in range(batch):
        correct += np.round(np.array(formula.subs('x_1', X[i,0]).subs('x_2', X[i,1])).astype(np.float64)) == y[i,0]
    return correct/batch

print('train acc of the formula:', acc(formula, dataset['train_input'], dataset['train_label']))
print('test acc of the formula:', acc(formula, dataset['test_input'], dataset['test_label']))
>>
train acc of the formula: tensor(1.)
test acc of the formula: tensor(1.)

Classification formulation

classification problem(output dimension = 2, CrossEntropy loss) - 2개의 달 데이터 셋을 생성하자.

from kan import KAN
import matplotlib.pyplot as plt
from sklearn.datasets import make_moons
import torch
import numpy as np

dataset = {}
train_input, train_label = make_moons(n_samples=1000, shuffle=True, noise=0.1, random_state=None)
test_input, test_label = make_moons(n_samples=1000, shuffle=True, noise=0.1, random_state=None)

dataset['train_input'] = torch.from_numpy(train_input).type(dtype).to(device)
dataset['test_input'] = torch.from_numpy(test_input).type(dtype).to(device)
dataset['train_label'] = torch.from_numpy(train_label).type(torch.long).to(device)
dataset['test_label'] = torch.from_numpy(test_label).type(torch.long).to(device)

X = dataset['train_input']
y = dataset['train_label']
plt.scatter(X[:,0].cpu().detach().numpy(), X[:,1].cpu().detach().numpy(), c=y[:].cpu().detach().numpy())

model = KAN(width=[2,2], grid=3, k=3, seed=2024, device=device)

def train_acc():
    return torch.mean((torch.argmax(model(dataset['train_input']), dim=1) == dataset['train_label']).type(dtype))

def test_acc():
    return torch.mean((torch.argmax(model(dataset['test_input']), dim=1) == dataset['test_label']).type(dtype))

results = model.fit(dataset, opt="LBFGS", steps=20, metrics=(train_acc, test_acc), loss_fn=torch.nn.CrossEntropyLoss());

# Automatic symbolic regression
lib = ['x','x^2','x^3','x^4','exp','log','sqrt','tanh','sin','abs']
model.auto_symbolic(lib=lib)
formula1, formula2 = model.symbolic_formula()[0]
ex_round(formula1, 4)

ex_round(formula2, 4)

# how accurate is this formula?
def acc(formula1, formula2, X, y):
    batch = X.shape[0]
    correct = 0
    for i in range(batch):
        logit1 = np.array(formula1.subs('x_1', X[i,0]).subs('x_2', X[i,1])).astype(np.float64)
        logit2 = np.array(formula2.subs('x_1', X[i,0]).subs('x_2', X[i,1])).astype(np.float64)
        correct += (logit2 > logit1) == y[i]
    return correct/batch

print('train acc of the formula:', acc(formula1, formula2, dataset['train_input'], dataset['train_label']))
print('test acc of the formula:', acc(formula1, formula2, dataset['test_input'], dataset['test_label']))
>>
train acc of the formula: tensor(0.8770)
test acc of the formula: tensor(0.8720)

Example 5 : Special functions

special function : $f(x, y) = exp(J_0(20x) + y^2)$, where $J_0(x)$ is the Bessel function

from kan import *

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

# create a KAN: 2D inputs, 1D output, and 5 hidden neurons. cubic spline (k=3), 5 grid intervals (grid=5).
model = KAN(width=[2,1,1], grid=3, k=3, seed=2, device=device)
f = lambda x: torch.exp(torch.special.bessel_j0(20*x[:,[0]]) + x[:,[1]]**2)
dataset = create_dataset(f, n_var=2, device=device)

# train the model
model.fit(dataset, opt="LBFGS", steps=20);
model.plot()

model = model.refine(20)
model.fit(dataset, opt="LBFGS", steps=20);
model.plot()

suggest_symbolic 함수가 기본 SYMBOLIC_LIB에 베셀 함수가 포함되어 있지 않기 때문에 일치하는 결과를 반환하지 않는다. 따라서 베셀 함수를 SYMBOLIC_LIB에 추가하려고 한다.

model.suggest_symbolic(0,0,0)
>>
  function  fitting r2   r2 loss  complexity  complexity loss  total loss
0        0    0.000000  0.000014           0                0    0.000003
1        x    0.001589 -0.002279           1                1    0.799544
2      cos    0.161514 -0.254125           2                2    1.549175
3      sin    0.161514 -0.254125           2                2    1.549175
4    1/x^2    0.099373 -0.150982           2                2    1.569804
('0',
 (<function kan.utils.<lambda>(x)>,
  <function kan.utils.<lambda>(x)>,
  0,
  <function kan.utils.<lambda>(x, y_th)>),
 0.0,
 0)

SYMBOLIC_LIB.keys()
>>
dict_keys(['x', 'x^2', 'x^3', 'x^4', 'x^5', '1/x', '1/x^2', '1/x^3', '1/x^4', '1/x^5', 
'sqrt', 'x^0.5', 'x^1.5', '1/sqrt(x)', '1/x^0.5', 'exp', 'log', 'abs', 'sin', 'cos', 'tan',
'tanh', 'sgn', 'arcsin', 'arccos', 'arctan', 'arctanh', '0', 'gaussian'])

베젤 함수를 SYMBOLIC_LIB에 추가하려면, 해당 함수의 이름, PyTorch 구현, 그리고 복잡도(c값)를 함께 정의해야 한다.

add_symbolic('J0', torch.special.bessel_j0, c=1)

# J0 fitting is not very good
model.suggest_symbolic(0,0,0)
>>
  function  fitting r2   r2 loss  complexity  complexity loss  total loss
0        0    0.000000  0.000014           0                0    0.000003
1       J0    0.198534 -0.319268           1                1    0.736146
2        x    0.001589 -0.002279           1                1    0.799544
3      cos    0.161514 -0.254125           2                2    1.549175
4      sin    0.161514 -0.254125           2                2    1.549175
('0',
 (<function kan.utils.<lambda>(x)>,
  <function kan.utils.<lambda>(x)>,
  0,
  <function kan.utils.<lambda>(x, y_th)>),
 0.0,
 0)

적합된 $R^2$ 값이 여전히 높지 않은 이유는, 실제 함수가 $J_0(20x)$인데 여기서 20이라는 값이 너무 크기 때문이다. 기본 검색 범위가(-10, 10)이므로 20을 포함하기 위해 검색 범위를 더 크게 설정해야 한다. 이제 $J_0$가 목록의 상단에 나타난다.

model.suggest_symbolic(0,0,0,a_range=(-40,40))
>>
  function  fitting r2   r2 loss  complexity  complexity loss  total loss
0       J0    0.998935 -9.862066           1                1   -1.172413
1        0    0.000000  0.000014           0                0    0.000003
2        x    0.001589 -0.002279           1                1    0.799544
3      cos    0.584193 -1.265980           2                2    1.346804
4      sin    0.584193 -1.265980           2                2    1.346804
('J0',
 (<function torch._C._special.special_bessel_j0>,
  J0,
  1,
  <function torch._C._special.special_bessel_j0>),
 0.9989354610443115,
 1)

Example 7 : Solving Partial Differential Equation(PDE) - tutorial과 같은 결과 X

$ \nabla^2 f(x, y) = -2\pi^2 \sin(\pi x) \sin(\pi y)$ with the boundary conditions $ f(-1, y) = f(1, y) = f(x, -1) = f(x, 1) = 0$. The ground truth solution is $ f(x, y) = \sin(\pi x) \sin(\pi y) $

from kan import *
import matplotlib.pyplot as plt
from torch import autograd
from tqdm import tqdm

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

dim = 2
np_i = 21 # number of interior points (along each dimension)
np_b = 21 # number of boundary points (along each dimension)
ranges = [-1, 1]

model = KAN(width=[2,2,1], grid=5, k=3, seed=1, device=device)

def batch_jacobian(func, x, create_graph=False):
    # x in shape (Batch, Length)
    def _func_sum(x):
        return func(x).sum(dim=0)
    return autograd.functional.jacobian(_func_sum, x, create_graph=create_graph).permute(1,0,2)

# define solution
sol_fun = lambda x: torch.sin(torch.pi*x[:,[0]])*torch.sin(torch.pi*x[:,[1]])
source_fun = lambda x: -2*torch.pi**2 * torch.sin(torch.pi*x[:,[0]])*torch.sin(torch.pi*x[:,[1]])

# interior
sampling_mode = 'random' # 'radnom' or 'mesh'

x_mesh = torch.linspace(ranges[0],ranges[1],steps=np_i)
y_mesh = torch.linspace(ranges[0],ranges[1],steps=np_i)
X, Y = torch.meshgrid(x_mesh, y_mesh, indexing="ij")
if sampling_mode == 'mesh':
    #mesh
    x_i = torch.stack([X.reshape(-1,), Y.reshape(-1,)]).permute(1,0)
else:
    #random
    x_i = torch.rand((np_i**2,2))*2-1

x_i = x_i.to(device)

# boundary, 4 sides
helper = lambda X, Y: torch.stack([X.reshape(-1,), Y.reshape(-1,)]).permute(1,0)
xb1 = helper(X[0], Y[0])
xb2 = helper(X[-1], Y[0])
xb3 = helper(X[:,0], Y[:,0])
xb4 = helper(X[:,0], Y[:,-1])
x_b = torch.cat([xb1, xb2, xb3, xb4], dim=0)

x_b = x_b.to(device)

steps = 20
alpha = 0.01
log = 1

def train():
    optimizer = LBFGS(model.parameters(), lr=1, history_size=10, line_search_fn="strong_wolfe", tolerance_grad=1e-32, tolerance_change=1e-32, tolerance_ys=1e-32)

    pbar = tqdm(range(steps), desc='description', ncols=100)

    for _ in pbar:
        def closure():
            global pde_loss, bc_loss
            optimizer.zero_grad()
            # interior loss
            sol = sol_fun(x_i)
            sol_D1_fun = lambda x: batch_jacobian(model, x, create_graph=True)[:,0,:]
            sol_D1 = sol_D1_fun(x_i)
            sol_D2 = batch_jacobian(sol_D1_fun, x_i, create_graph=True)[:,:,:]
            lap = torch.sum(torch.diagonal(sol_D2, dim1=1, dim2=2), dim=1, keepdim=True)
            source = source_fun(x_i)
            pde_loss = torch.mean((lap - source)**2)

            # boundary loss
            bc_true = sol_fun(x_b)
            bc_pred = model(x_b)
            bc_loss = torch.mean((bc_pred-bc_true)**2)

            loss = alpha * pde_loss + bc_loss
            loss.backward()
            return loss

        if _ % 5 == 0 and _ < 50:
            model.update_grid_from_samples(x_i)

        optimizer.step(closure)
        sol = sol_fun(x_i)
        loss = alpha * pde_loss + bc_loss
        l2 = torch.mean((model(x_i) - sol)**2)

        if _ % log == 0:
            pbar.set_description("pde loss: %.2e | bc loss: %.2e | l2: %.2e " % (pde_loss.cpu().detach().numpy(), bc_loss.cpu().detach().numpy(), l2.cpu().detach().numpy()))

train()

1. tqdm : Python에서 사용하는 진행률 표시바 라이브러리

2. torch.meshgrid에서 indexing = "ij"란? meshgrid를 만들 때 행렬 인덱싱 방식을 사용하겠다는 의미로 indexing옵션에는 두 가지가 있다.

(1) "ij" : 수학에서의 인덱스 표기법으로 첫 번째 좌표가 행(row), 두 번째 좌표가 열(column/)

(2) "xy" : 좌표 평면 인덱싱 방식으로 첫 번째 좌표가 x-좌표, 두 번째 좌표가 y-좌표

3. batch_jacobian 함수 : 주어진 함수의 입력에 대한 배치 자코비안 계산

def batch_jacobian(func, x, create_graph=False):
    # x in shape (Batch, Length)
    def _func_sum(x):
        return func(x).sum(dim=0)
    return autograd.functional.jacobian(_func_sum, x, create_graph=create_graph).permute(1,0,2)

x : [Batch, Length]

create_graph : 미분된 결과를 바탕으로 다시 미분이 가능하도록 그래프를 생성할지 여부를 결정하는 플래그. True로 설정하면 2차 미분 계산이 가능하다.

permute(1, 0, 2) : [Batch, Output, Input] 형식으로 맞춤

import torch
from torch import autograd

# batch_jacobian 함수 정의
def batch_jacobian(func, x, create_graph=False):
    def _func_sum(x):
        return func(x).sum(dim=0)
    return autograd.functional.jacobian(_func_sum, x, create_graph=create_graph).permute(1,0,2)

# 함수 정의: 입력 x의 각 요소를 제곱하여 반환
def example_func(x):
    # x의 각 요소를 제곱하여 반환
    return x ** 2

# 배치 데이터 생성: 3개의 샘플, 각 샘플은 2차원 벡터
x = torch.tensor([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], requires_grad=True)

# batch_jacobian을 사용하여 example_func의 자코비안 계산
jacobian = batch_jacobian(example_func, x)

print("Input x:")
print(x)
print("\nJacobian:")
print(jacobian)
>>
Input x:
tensor([[1., 2.],
        [3., 4.],
        [5., 6.]], requires_grad=True)

Jacobian:
tensor([[[ 2.,  0.],
         [ 0.,  4.]],

        [[ 6.,  0.],
         [ 0.,  8.]],

        [[10.,  0.],
         [ 0., 12.]]])

4. pbar란? tqdm을 통해 생성된 진행률 표시

model.plot(beta=10)

첫 번째 레이어의 활성화 함수는 선형 함수로, 두 번째 레이어의 활성화 함수는 사인 함수로 고정하자.(※하이퍼파라미터에 매우 민감할 수 있음)

model.fix_symbolic(0,0,0,'x')
model.fix_symbolic(0,0,1,'x')
model.fix_symbolic(0,1,0,'x')
model.fix_symbolic(0,1,1,'x')
>>
r2 is 0.9995107650756836
saving model version 0.1
r2 is 0.9994319677352905
saving model version 0.2
r2 is 0.999911367893219
saving model version 0.3
Best value at boundary.
r2 is 0.9999406933784485
saving model version 0.4
tensor(0.9999)

모든 레이어를 심볼릭 함수로 설정한 후, 모델을 추가로 훈련시키자.

train()
formula = model.symbolic_formula()[0][0]
ex_round(formula,6)

from kan import KAN, LBFGS
import torch
import matplotlib.pyplot as plt
from torch import autograd
from tqdm import tqdm

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)


dim = 2
np_i = 51 # number of interior points (along each dimension)
np_b = 51 # number of boundary points (along each dimension)
ranges = [-1, 1]


def batch_jacobian(func, x, create_graph=False):
    # x in shape (Batch, Length)
    def _func_sum(x):
        return func(x).sum(dim=0)
    return autograd.functional.jacobian(_func_sum, x, create_graph=create_graph).permute(1,0,2)

# define solution
sol_fun = lambda x: torch.sin(torch.pi*x[:,[0]])*torch.sin(torch.pi*x[:,[1]])
source_fun = lambda x: -2*torch.pi**2 * torch.sin(torch.pi*x[:,[0]])*torch.sin(torch.pi*x[:,[1]])

# interior
sampling_mode = 'mesh' # 'radnom' or 'mesh'

x_mesh = torch.linspace(ranges[0],ranges[1],steps=np_i)
y_mesh = torch.linspace(ranges[0],ranges[1],steps=np_i)
X, Y = torch.meshgrid(x_mesh, y_mesh, indexing="ij")
if sampling_mode == 'mesh':
    #mesh
    x_i = torch.stack([X.reshape(-1,), Y.reshape(-1,)]).permute(1,0)
else:
    #random
    x_i = torch.rand((np_i**2,2))*2-1

x_i = x_i.to(device)

# boundary, 4 sides
helper = lambda X, Y: torch.stack([X.reshape(-1,), Y.reshape(-1,)]).permute(1,0)
xb1 = helper(X[0], Y[0])
xb2 = helper(X[-1], Y[0])
xb3 = helper(X[:,0], Y[:,0])
xb4 = helper(X[:,0], Y[:,-1])
x_b = torch.cat([xb1, xb2, xb3, xb4], dim=0)

x_b = x_b.to(device)

alpha = 0.01
log = 1


grids = [5,10,20]
steps = 50

pde_losses = []
bc_losses = []
l2_losses = []

for grid in grids:
    if grid == grids[0]:
        model = KAN(width=[2,2,1], grid=grid, k=3, seed=1, device=device)
        model = model.speed()
    else:
        model.save_act = True
        model.get_act(x_i)
        model = model.refine(grid)
        model = model.speed()

    def train():
        optimizer = LBFGS(model.parameters(), lr=1, history_size=10, line_search_fn="strong_wolfe", tolerance_grad=1e-32, tolerance_change=1e-32, tolerance_ys=1e-32)

        pbar = tqdm(range(steps), desc='description', ncols=100)

        for _ in pbar:
            def closure():
                global pde_loss, bc_loss
                optimizer.zero_grad()
                # interior loss
                sol = sol_fun(x_i)
                sol_D1_fun = lambda x: batch_jacobian(model, x, create_graph=True)[:,0,:]
                sol_D1 = sol_D1_fun(x_i)
                sol_D2 = batch_jacobian(sol_D1_fun, x_i, create_graph=True)[:,:,:]
                lap = torch.sum(torch.diagonal(sol_D2, dim1=1, dim2=2), dim=1, keepdim=True)
                source = source_fun(x_i)
                pde_loss = torch.mean((lap - source)**2)

                # boundary loss
                bc_true = sol_fun(x_b)
                bc_pred = model(x_b)
                bc_loss = torch.mean((bc_pred-bc_true)**2)

                loss = alpha * pde_loss + bc_loss
                loss.backward()
                return loss

            if _ % 5 == 0 and _ < 20:
                model.update_grid_from_samples(x_i)

            optimizer.step(closure)
            sol = sol_fun(x_i)
            loss = alpha * pde_loss + bc_loss
            l2 = torch.mean((model(x_i) - sol)**2)

            if _ % log == 0:
                pbar.set_description("pde loss: %.2e | bc loss: %.2e | l2: %.2e " % (pde_loss.cpu().detach().numpy(), bc_loss.cpu().detach().numpy(), l2.cpu().detach().numpy()))

            pde_losses.append(pde_loss.cpu().detach().numpy())
            bc_losses.append(bc_loss.cpu().detach().numpy())
            l2_losses.append(l2.cpu().detach().numpy())


    train()

plt.plot(pde_losses, marker='o')
plt.plot(bc_losses, marker='o')
plt.plot(l2_losses, marker='o')
plt.yscale('log')
plt.xlabel('steps')
plt.legend(['PDE loss', 'BC loss', 'L2 squared'])

Example 8 : Continual Learning

우리의 목표는 샘플을 통해 1차원 함수를 학습하는 것이다. 이 1차원 함수는 5개의 가우시간 피크를 가지고 있다. 모든 샘플은 한 번에 신경망에 제공되는 대신, 다섯 단계에 걸쳐 학습을 진행한다. 각 단계에서는 하나의 피크 주변의 샘플만 KAN에 제공된다. 우리는 스플라인의 국소성 덕분에 KAN이 연속 학습을 수행할 수 있음을 발견했다.

저작자표시

ABOUT ME

공부하는 사람의 공부.c 공부하는 사람의 공부.c