网站上怎么在图片里面做链接,站群搭建,云服务器 虚拟机,企业网络营销策略设计Adam优化器的工作方式是通过不断更新一阶矩估计和二阶矩估计来自适应地调整学习率#xff0c;并利用动量法来加速训练过程。这种方式可以在不同的参数更新方向和尺度上进行自适应调整#xff0c;从而更有效地优化模型。
https://arxiv.org/pdf/1412.6980.pdf
参数 这些参数… Adam优化器的工作方式是通过不断更新一阶矩估计和二阶矩估计来自适应地调整学习率并利用动量法来加速训练过程。这种方式可以在不同的参数更新方向和尺度上进行自适应调整从而更有效地优化模型。
https://arxiv.org/pdf/1412.6980.pdf
参数 这些参数在Adam优化器中用于计算参数的更新值并最终更新模型的参数。具体的计算公式可以查阅Adam优化器的算法原理。
x参数权重和偏置的值。对于Adam优化器而言这是模型的可学习参数通过优化算法进行更新。g1梯度向量。该向量包含了每个参数对应的梯度值用于计算梯度的一阶矩估计。g2梯度平方向量。该向量包含了每个参数对应的梯度平方值用于计算梯度的二阶矩估计。m一阶矩向量。该向量包含了每个参数对应的一阶矩估计值将梯度的一阶矩估计累加用于计算参数更新的动力学部分。v二阶矩向量。该向量包含了每个参数对应的二阶矩估计值将梯度平方的二阶矩估计累加用于计算参数更新的动力学部分。mh偏置修正的一阶矩向量。由于初始时m和v的值都接近于0导致在迭代初期时估计的动量和学习率偏向较高。为了解决这个问题采用了偏置修正即每个参数对应的一阶矩估计值除以一个衰减因子用于修正初始阶段的动量偏向。vh偏置修正的二阶矩向量。该向量类似于mh但是用于修正二阶矩估计的初始阶段偏向。 float * x (float *)opt-adam.x-data; // view of the parametersfloat * g1 (float *)opt-adam.g1-data; // gradientfloat * g2 (float *)opt-adam.g2-data; // gradient squaredfloat * m (float *)opt-adam.m-data; // first momentfloat * v (float *)opt-adam.v-data; // second momentfloat * mh (float *)opt-adam.mh-data; // first moment hatfloat * vh (float *)opt-adam.vh-data; // second moment hat初始化变量Adam优化器会初始化两个变量m和v分别用来存储梯度的一阶矩估计和二阶矩估计。这两个变量的维度与模型的参数维度相同。 计算梯度在每个训练批次中根据当前的参数值计算梯度。 更新一阶矩估计通过指数加权移动平均的方式更新一阶矩估计m。具体而言对于每个参数的梯度将其乘以一个衰减系数β1通常取值为0.9然后加到对应的一阶矩估计m上。 更新二阶矩估计通过指数加权移动平均的方式更新二阶矩估计v。具体而言对于每个参数的梯度的平方将其乘以一个衰减系数β2通常取值为0.999然后加到对应的二阶矩估计v上。 偏差修正由于初始时m和v的值都为0会导致它们在训练初期偏向于0。为了修正这个偏差需要进行偏差修正操作。具体而言将一阶矩估计m和二阶矩估计v除以1减去β1和1减去β2的指数衰减权重。 更新参数根据一阶矩估计m和二阶矩估计v以及学习率α更新模型的参数。具体而言对于每个参数将其当前值减去学习率乘以一阶矩估计m除以二阶矩估计v的平方根。 重复步骤2-6重复执行步骤2-6直到达到停止条件如达到最大迭代次数或梯度收敛。
PS:需要优化的params ADAM
//
// ADAM
//
// ref: https://arxiv.org/pdf/1412.6980.pdf
//static enum ggml_opt_result ggml_opt_adam(
struct ggml_context * ctx,
struct ggml_opt_context * opt,
struct ggml_opt_params params,
struct ggml_tensor * f,
struct ggml_cgraph * gf,
struct ggml_cgraph * gb) {
GGML_ASSERT(ggml_is_scalar(f));// these will store the parameters we want to optimize
struct ggml_tensor * ps[GGML_MAX_PARAMS];int np 0;
int nx 0;
for (int i 0; i gf-n_nodes; i) {
if (gf-nodes[i]-is_param) {
GGML_PRINT_DEBUG(found param %d: grad-op %d\n, np, gf-nodes[i]-grad-op);GGML_ASSERT(np GGML_MAX_PARAMS);ps[np] gf-nodes[i];
nx ggml_nelements(gf-nodes[i]);
}
}if ((opt-params.type ! params.type) || (opt-nx ! nx) || (opt-params.past ! params.past)) {
int iter opt-iter;
ggml_opt_init(opt-ctx, opt, params, nx);
opt-iter iter;
}// constants
const float sched params.adam.sched;
const float decay params.adam.decay * sched;
const float alpha params.adam.alpha * sched;
const float beta1 params.adam.beta1;
const float beta2 params.adam.beta2;
const float eps params.adam.eps;float * x opt-adam.x-data; // view of the parameters
float * g1 opt-adam.g1-data; // gradient
float * g2 opt-adam.g2-data; // gradient squared
float * m opt-adam.m-data; // first moment
float * v opt-adam.v-data; // second moment
float * mh opt-adam.mh-data; // first moment hat
float * vh opt-adam.vh-data; // second moment hatfloat * pf params.past 0 ? opt-adam.pf-data : NULL; // past function values// update view
ggml_opt_get_params(np, ps, x);// compute the function value
ggml_graph_reset (gf);
ggml_set_f32 (f-grad, 1.0f);ggml_graph_compute_with_ctx(ctx, gb, params.n_threads);opt-adam.fx_prev ggml_get_f32_1d(f, 0);
opt-adam.fx_best opt-adam.fx_prev;
if (pf) {
pf[opt-iter % params.past] opt-adam.fx_prev;
}// initialize
if (opt-just_initialized) {
opt-adam.n_no_improvement 0;
opt-just_initialized false;
}float * fx_best opt-adam.fx_best;
float * fx_prev opt-adam.fx_prev;
int * n_no_improvement opt-adam.n_no_improvement;int iter0 opt-iter;// run the optimizer
for (int t 0; t params.adam.n_iter; t) {
opt-iter iter0 t 1;
GGML_PRINT_DEBUG ( iter %d \n, t);GGML_PRINT_DEBUG (f %10.6f\n, ggml_get_f32_1d(f, 0));
GGML_PRINT_DEBUG_5(df/dx0 %10.6f\n, ggml_get_f32_1d(ps[0]-grad, 0));
GGML_PRINT_DEBUG_5(df/dx1 %10.6f\n, ggml_get_f32_1d(ps[1]-grad, 0));for (int i 0; i np; i) {
GGML_PRINT_DEBUG(param %d: %10.6f, g %10.6f\n, i,
ggml_get_f32_1d(ps[i], 0), ggml_get_f32_1d(ps[i]-grad, 0));
}const int64_t t_start_wall ggml_time_us();
const int64_t t_start_cpu ggml_cycles();
UNUSED(t_start_wall);
UNUSED(t_start_cpu);{
// update the gradient
ggml_opt_get_grad(np, ps, g1);// m_t beta1*m_t-1 (1 - beta1)*g_t
ggml_vec_scale_f32(nx, m, beta1);
ggml_vec_mad_f32 (nx, m, g1, 1.0f - beta1);// g2 g1^2
ggml_vec_sqr_f32 (nx, g2, g1);// v_t beta2*v_t-1 (1 - beta2)*g_t^2
ggml_vec_scale_f32(nx, v, beta2);
ggml_vec_mad_f32 (nx, v, g2, 1.0f - beta2);// m^hat m_t / (1 - beta1^t)
// v^hat v_t / (1 - beta2^t)
// x_t x_t-1 - sched*(alpha*m^hat/(sqrt(v^hat) eps) decay*x_t-1)
// x_t x_t-1 - sched*alpha*m^hat/(sqrt(v^hat) eps) - sched*decay*x_t-1
// x_t x_t-1*(1-sched*decay) - sched*alpha*m^hat/(sqrt(v^hat) eps)
// x_t x_t-1*(1-sched*decay) sched*decay*(-alpha/decay)*m^hat/(sqrt(v^hat) eps)
// x_t mix(x_t-1, (-alpha/decay)*m^hat/(sqrt(v^hat) eps), sched*decay)
ggml_vec_cpy_f32 (nx, mh, m);
ggml_vec_cpy_f32 (nx, vh, v);ggml_vec_scale_f32(nx, mh, alpha/(1.0f - powf(beta1, opt-iter)));
ggml_vec_scale_f32(nx, vh, 1.0f/(1.0f - powf(beta2, opt-iter)));ggml_vec_sqrt_f32 (nx, vh, vh);
ggml_vec_acc1_f32 (nx, vh, eps);ggml_vec_div_f32 (nx, mh, mh, vh);
ggml_vec_scale_f32(nx, x, 1.0f - decay);
ggml_vec_sub_f32 (nx, x, x, mh);// update the parameters
ggml_opt_set_params(np, ps, x);
}ggml_graph_reset (gf);
ggml_set_f32 (f-grad, 1.0f);ggml_graph_compute_with_ctx(ctx, gb, params.n_threads);const float fx ggml_get_f32_1d(f, 0);// check convergence
if (fabsf(fx - fx_prev[0])/fx params.adam.eps_f) {
GGML_PRINT_DEBUG(converged\n);return GGML_OPT_OK;
}// delta-based convergence test
if (pf ! NULL) {
// need at least params.past iterations to start checking for convergence
if (params.past iter0 t) {
const float rate (pf[(iter0 t)%params.past] - fx)/fx;if (fabsf(rate) params.delta) {
return GGML_OPT_OK;
}
}pf[(iter0 t)%params.past] fx;
}// check for improvement
if (params.max_no_improvement 0) {
if (fx_best[0] fx) {
fx_best[0] fx;
n_no_improvement[0] 0;
} else {
n_no_improvement[0];if (n_no_improvement[0] params.max_no_improvement) {
return GGML_OPT_OK;
}
}
}fx_prev[0] fx;{
const int64_t t_end_cpu ggml_cycles();
GGML_PRINT_DEBUG(time iter: %5.3f s\n, ((float)(t_end_cpu - t_start_cpu))/CLOCKS_PER_SEC);
UNUSED(t_end_cpu);const int64_t t_end_wall ggml_time_us();
GGML_PRINT_DEBUG(wall time iter: %5.3f s\n, (t_end_wall - t_start_wall)/1e6);
UNUSED(t_end_wall);
}
}return GGML_OPT_DID_NOT_CONVERGE;
}
CG
https://github1s.com/ggerganov/ggml/blob/HEAD/src/ggml.c#L18414-L18610
CODE #include ggml.h#include cmath
#include cstdio
#include cstdlib
#include cassert
#include random
#include ggml.h
#include stdio.h
#include cstring
#include vector
#include random#define MAX_NARGS 2#if defined(__GNUC__)
#pragma GCC diagnostic ignored -Wdouble-promotion
#endif//
// logging
//
#define GGML_DEBUG 0
#if (GGML_DEBUG 1)
#define GGML_PRINT_DEBUG(...) printf(__VA_ARGS__)
#else
#define GGML_PRINT_DEBUG(...)
#endif#if (GGML_DEBUG 5)
#define GGML_PRINT_DEBUG_5(...) printf(__VA_ARGS__)
#else
#define GGML_PRINT_DEBUG_5(...)
#endif#if (GGML_DEBUG 10)
#define GGML_PRINT_DEBUG_10(...) printf(__VA_ARGS__)
#else
#define GGML_PRINT_DEBUG_10(...)
#endif#define GGML_PRINT(...) printf(__VA_ARGS__)float frand(void) {return (float)rand()/(float)RAND_MAX;
}int irand(int n) {return rand()%n;
}void get_random_dims(int64_t * dims, int ndims) {dims[0] dims[1] dims[2] dims[3] 1;for (int i 0; i ndims; i) {dims[i] 1 irand(4);}
}void get_random_dims_minmax(int64_t * dims, int ndims, int min, int max) {dims[0] dims[1] dims[2] dims[3] 1;for (int i 0; i ndims; i) {dims[i] min irand(max-min);}
}struct ggml_tensor * get_random_tensor(struct ggml_context * ctx0,int ndims,int64_t ne[],float fmin,float fmax) {struct ggml_tensor * result ggml_new_tensor(ctx0, GGML_TYPE_F32, ndims, ne);switch (ndims) {case 1:for (int i0 0; i0 ne[0]; i0) {((float *)result-data)[i0] frand()*(fmax - fmin) fmin;}break;case 2:for (int i1 0; i1 ne[1]; i1) {for (int i0 0; i0 ne[0]; i0) {((float *)result-data)[i1*ne[0] i0] frand()*(fmax - fmin) fmin;}}break;case 3:for (int i2 0; i2 ne[2]; i2) {for (int i1 0; i1 ne[1]; i1) {for (int i0 0; i0 ne[0]; i0) {((float *)result-data)[i2*ne[1]*ne[0] i1*ne[0] i0] frand()*(fmax - fmin) fmin;}}}break;case 4:for (int i3 0; i3 ne[3]; i3) {for (int i2 0; i2 ne[2]; i2) {for (int i1 0; i1 ne[1]; i1) {for (int i0 0; i0 ne[0]; i0) {((float *)result-data)[i3*ne[2]*ne[1]*ne[0] i2*ne[1]*ne[0] i1*ne[0] i0] frand()*(fmax - fmin) fmin;}}}}break;default:assert(false);};return result;
}float get_element(const struct ggml_tensor * t, int idx) {return ((float *)t-data)[idx];
}void set_element(struct ggml_tensor * t, int idx, float value) {((float *)t-data)[idx] value;
}float run(int val){return 0.1f;
}struct ggml_init_params params {/* .mem_size */ 1024*1024*10, // 设置内存大小为1GB/* .mem_buffer */ NULL, // 设置内存缓冲区为空/* .no_alloc */ false, // 允许内存分配
};
struct ggml_context * ctx ggml_init(params); // 创建一个ggml_context结构体对象int64_t ne1[4] {2, 512, 1, 1};
int64_t ne2[4] {512, 256, 1, 1};
int64_t ne3[4] {1, 256, 1, 1};
int64_t ne4[4] {256, 3, 1, 1};
int64_t ne5[4] {3, 1, 1, 1};int64_t mid_w[4] {256, 256, 1, 1};
int64_t mid_b[4] {256, 1, 1, 1};struct res {ggml_cgraph ge;ggml_tensor * e;ggml_tensor * output;ggml_tensor * intput;ggml_tensor * target;};res hf_getcompute_graph(){struct ggml_tensor * X ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 2);// struct ggml_tensor * X get_random_tensor(ctx, 2, ne1, -1, 1); // 创建一个2维随机张量astruct ggml_tensor * target ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 3);struct ggml_tensor * proj get_random_tensor(ctx, 2, ne1, -1, 1);struct ggml_tensor * fc1_weight get_random_tensor(ctx, 2, ne2, -1, 1);struct ggml_tensor * fc1_bias get_random_tensor(ctx, 2, ne3, -1, 1);struct ggml_tensor * fcout_weight get_random_tensor(ctx, 2, ne4, -1, 1);struct ggml_tensor * fcout_bias get_random_tensor(ctx, 2, ne5, -1, 1);struct ggml_tensor * fc3_weight get_random_tensor(ctx, 2, mid_w, -1, 1);struct ggml_tensor * fc3_bias get_random_tensor(ctx, 2, mid_b, -1, 1);struct ggml_tensor * fc4_weight get_random_tensor(ctx, 2, mid_w, -1, 1);struct ggml_tensor * fc4_bias get_random_tensor(ctx, 2, mid_b, -1, 1);struct ggml_tensor * fc5_weight get_random_tensor(ctx, 2, mid_w, -1, 1);struct ggml_tensor * fc5_bias get_random_tensor(ctx, 2, mid_b, -1, 1);ggml_set_param(ctx, fcout_weight); ggml_set_param(ctx, fcout_bias); ggml_set_param(ctx, fc3_weight); ggml_set_param(ctx, fc3_bias);ggml_set_param(ctx, fc4_weight); ggml_set_param(ctx, fc4_bias); ggml_set_param(ctx, fc5_weight); ggml_set_param(ctx, fc5_bias);ggml_tensor * proj_of_x ggml_mul_mat(ctx, proj,X); // 512*1ggml_tensor * fc1 ggml_add(ctx, ggml_mul_mat(ctx, proj_of_x,fc1_weight), fc1_bias); // 1*256ggml_tensor * fc1_ ggml_view_2d(ctx,fc1,256,1,4*256,0);//ggml_transpose(ctx,fc1); // 256*1// ggml_tensor * fc2 ggml_mul_mat(ctx,fc3_weight,fc1_); // 256*1 ggml_tensor * fc2 ggml_mul_mat(ctx,fc1_,fc3_weight); // 256*1 256*256 1*256ggml_tensor * fc3 ggml_add(ctx, ggml_mul_mat(ctx, fc3_weight, ggml_relu(ctx, fc1_)), fc3_bias);ggml_tensor * fc4 ggml_add(ctx, ggml_mul_mat(ctx, fc4_weight, ggml_relu(ctx, fc3)), fc4_bias);ggml_tensor * fc5 ggml_add(ctx, ggml_mul_mat(ctx, fc5_weight, ggml_relu(ctx, fc4)), fc5_bias);ggml_tensor * fcout ggml_add(ctx, ggml_mul_mat(ctx, fcout_weight, ggml_relu(ctx, fc5)), fcout_bias);// 3*1ggml_tensor * fcout_ ggml_sub(ctx,fcout,target);struct ggml_tensor * e ggml_sum(ctx, ggml_sqr(ctx, fcout_) ); // 计算张量d的平方和estruct ggml_cgraph ge ggml_build_forward(e); // 构建计算图gereturn {ge, e, fcout, X,target};}void hf_out(ggml_tensor * output){printf(f %f\n, ggml_get_f32_1d(output, 0));// cout res ((float *)(fc2-data))[0];printf(f %f\n, ggml_get_f32_1d(output, 1));printf(f %f\n, ggml_get_f32_1d(output, 2));
}void hf_free(){ggml_free(ctx); // 释放上下文内存
}res a hf_getcompute_graph();
static int *istrain new int(1);// delete istrain;void hf_set_data_random(){ggml_tensor * input a.intput;ggml_tensor * target a.target;std::random_device rd; // 获取随机数种子std::mt19937 gen(rd()); // 使用随机数种子初始化随机数生成器std::uniform_real_distributionfloat dis(0.0, 1.0); // 定义均匀分布函数范围为[0.0, 1.0)std::vectorfloat digit(512); // 创建大小为512的vectorfloatfor (int i 0; i digit.size(); i) {digit[i] dis(gen); // 使用分布函数生成随机数并赋值给vector的每个元素}memcpy(input-data, digit.data(), ggml_nbytes(input));digit {100.3f,200.5f,100.1f};memcpy(target-data, digit.data(), ggml_nbytes(target));*istrain 1;}void hf_set_data(ggml_tensor * input,ggml_tensor * target,vectorfloat v_in ,vectorfloat v_out){memcpy(input-data, v_in.data(), ggml_nbytes(input));memcpy(target-data, v_out.data(), ggml_nbytes(target));
}namespace hf_adam{const string about this is a namespace for train only one epoch for adam;static struct ggml_opt_params opt_params ggml_opt_default_params(GGML_OPT_ADAM); // 获取默认的优化参数static struct ggml_opt_context * opt (struct ggml_opt_context *) alloca(sizeof(struct ggml_opt_context));
// staticstatic struct ggml_tensor * gfbuf ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(struct ggml_cgraph) / ggml_type_size(GGML_TYPE_I32) (sizeof(struct ggml_cgraph) % ggml_type_size(GGML_TYPE_I32) ? 1 : 0));static struct ggml_tensor * gbbuf ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(struct ggml_cgraph) / ggml_type_size(GGML_TYPE_I32) (sizeof(struct ggml_cgraph) % ggml_type_size(GGML_TYPE_I32) ? 1 : 0));// build forward backward compute graphsstatic struct ggml_cgraph * gf (struct ggml_cgraph *) hf_adam::gfbuf-data;static struct ggml_cgraph * gb (struct ggml_cgraph *) hf_adam::gbbuf-data;static int np;static int nx;}#ifdef __cplusplus
extern C {
#endifstatic inline bool ggml_is_scalar(const struct ggml_tensor * tensor) {static_assert(GGML_MAX_DIMS 4, GGML_MAX_DIMS is not 4 - update this function);return tensor-ne[0] 1 tensor-ne[1] 1 tensor-ne[2] 1 tensor-ne[3] 1;
}static void ggml_opt_get_params(int np, struct ggml_tensor * const ps[], float * x) {int i 0;for (int p 0; p np; p) {const int64_t ne ggml_nelements(ps[p]) ;
// TODO: add function to get all elements at oncefor (int64_t j 0; j ne; j) {x[i] ggml_get_f32_1d(ps[p], j);}}
}static void ggml_opt_get_grad(int np, struct ggml_tensor * const ps[], float * g) {int i 0;for (int p 0; p np; p) {const int64_t ne ggml_nelements(ps[p]) ;
// TODO: add function to get all elements at oncefor (int64_t j 0; j ne; j) {g[i] ggml_get_f32_1d(ps[p]-grad, j);}}
}// floating point type used to accumulate sums
typedef double ggml_float;inline static void ggml_vec_add_f32 (const int n, float * z, const float * x, const float * y) { for (int i 0; i n; i) z[i] x[i] y[i]; }
inline static void ggml_vec_add1_f32(const int n, float * z, const float * x, const float v) { for (int i 0; i n; i) z[i] x[i] v; }
inline static void ggml_vec_acc_f32 (const int n, float * y, const float * x) { for (int i 0; i n; i) y[i] x[i]; }
inline static void ggml_vec_acc1_f32(const int n, float * y, const float v) { for (int i 0; i n; i) y[i] v; }
inline static void ggml_vec_sub_f32 (const int n, float * z, const float * x, const float * y) { for (int i 0; i n; i) z[i] x[i] - y[i]; }
inline static void ggml_vec_set_f32 (const int n, float * x, const float v) { for (int i 0; i n; i) x[i] v; }
inline static void ggml_vec_cpy_f32 (const int n, float * y, const float * x) { for (int i 0; i n; i) y[i] x[i]; }
inline static void ggml_vec_neg_f32 (const int n, float * y, const float * x) { for (int i 0; i n; i) y[i] -x[i]; }
inline static void ggml_vec_mul_f32 (const int n, float * z, const float * x, const float * y) { for (int i 0; i n; i) z[i] x[i]*y[i]; }
inline static void ggml_vec_div_f32 (const int n, float * z, const float * x, const float * y) { for (int i 0; i n; i) z[i] x[i]/y[i]; }static void ggml_opt_set_params(int np, struct ggml_tensor * const ps[], const float * x) {int i 0;for (int p 0; p np; p) {const int64_t ne ggml_nelements(ps[p]) ;
// TODO: add function to set tensor from arrayfor (int64_t j 0; j ne; j) {ggml_set_f32_1d(ps[p], j, x[i]);}}
}static void ggml_vec_dot_f32(const int n, float * s, const float * x, const float * y) {
#ifdef GGML_SIMDfloat sumf 0.0f;
const int np (n ~(GGML_F32_STEP - 1));GGML_F32_VEC sum[GGML_F32_ARR] { GGML_F32_VEC_ZERO };GGML_F32_VEC ax[GGML_F32_ARR];
GGML_F32_VEC ay[GGML_F32_ARR];for (int i 0; i np; i GGML_F32_STEP) {
for (int j 0; j GGML_F32_ARR; j) {
ax[j] GGML_F32_VEC_LOAD(x i j*GGML_F32_EPR);
ay[j] GGML_F32_VEC_LOAD(y i j*GGML_F32_EPR);sum[j] GGML_F32_VEC_FMA(sum[j], ax[j], ay[j]);
}
}// reduce sum0..sum3 to sum0
GGML_F32_VEC_REDUCE(sumf, sum);// leftovers
for (int i np; i n; i) {
sumf x[i]*y[i];
}
#else
// scalarggml_float sumf 0.0;for (int i 0; i n; i) {sumf (ggml_float)(x[i]*y[i]);}
#endif*s sumf;
}inline static void ggml_vec_mad_f32(const int n, float * y, const float * x, const float v) {
// inline static void ggml_vec_mad_f32(const int n, float * restrict y, const float * restrict x, const float v) {
//#if defined(GGML_SIMD)
// const int np (n ~(GGML_F32_STEP - 1));
//
//GGML_F32_VEC vx GGML_F32_VEC_SET1(v);
//
//GGML_F32_VEC ax[GGML_F32_ARR];
//GGML_F32_VEC ay[GGML_F32_ARR];
//
//for (int i 0; i np; i GGML_F32_STEP) {
//for (int j 0; j GGML_F32_ARR; j) {
//ax[j] GGML_F32_VEC_LOAD(x i j*GGML_F32_EPR);
//ay[j] GGML_F32_VEC_LOAD(y i j*GGML_F32_EPR);
//ay[j] GGML_F32_VEC_FMA(ay[j], ax[j], vx);
//
//GGML_F32_VEC_STORE(y i j*GGML_F32_EPR, ay[j]);
//}
//}
//leftovers
//for (int i np; i n; i) {
//y[i] x[i]*v;
//}
//#elsescalarfor (int i 0; i n; i) {y[i] x[i]*v;}
//#endif
}//inline static void ggml_vec_scale_f32(const int n, float * y, const float v) { for (int i 0; i n; i) y[i] * v; }
inline static void ggml_vec_scale_f32(const int n, float * y, const float v) {
#if defined(GGML_USE_ACCELERATE)vDSP_vsmul(y, 1, v, y, 1, n);
#elif defined(GGML_SIMD)const int np (n ~(GGML_F32_STEP - 1));GGML_F32_VEC vx GGML_F32_VEC_SET1(v);GGML_F32_VEC ay[GGML_F32_ARR];for (int i 0; i np; i GGML_F32_STEP) {
for (int j 0; j GGML_F32_ARR; j) {
ay[j] GGML_F32_VEC_LOAD(y i j*GGML_F32_EPR);
ay[j] GGML_F32_VEC_MUL(ay[j], vx);GGML_F32_VEC_STORE(y i j*GGML_F32_EPR, ay[j]);
}
}// leftovers
for (int i np; i n; i) {
y[i] * v;
}
#else
// scalarfor (int i 0; i n; i) {y[i] * v;}
#endif
}inline static void ggml_vec_norm_f32 (const int n, float * s, const float * x) { ggml_vec_dot_f32(n, s, x, x); *s sqrtf(*s); }
inline static void ggml_vec_sqr_f32 (const int n, float * y, const float * x) { for (int i 0; i n; i) y[i] x[i]*x[i]; }
inline static void ggml_vec_sqrt_f32 (const int n, float * y, const float * x) { for (int i 0; i n; i) y[i] sqrtf(x[i]); }
inline static void ggml_vec_log_f32 (const int n, float * y, const float * x) { for (int i 0; i n; i) y[i] logf(x[i]); }
inline static void ggml_vec_abs_f32 (const int n, float * y, const float * x) { for (int i 0; i n; i) y[i] fabsf(x[i]); }
inline static void ggml_vec_sgn_f32 (const int n, float * y, const float * x) { for (int i 0; i n; i) y[i] (x[i] 0.f) ? 1.f : ((x[i] 0.f) ? -1.f : 0.f); }
inline static void ggml_vec_step_f32 (const int n, float * y, const float * x) { for (int i 0; i n; i) y[i] (x[i] 0.f) ? 1.f : 0.f; }
inline static void ggml_vec_tanh_f32 (const int n, float * y, const float * x) { for (int i 0; i n; i) y[i] tanhf(x[i]); }
inline static void ggml_vec_elu_f32 (const int n, float * y, const float * x) { for (int i 0; i n; i) y[i] (x[i] 0.f) ? x[i] : expf(x[i])-1; }
inline static void ggml_vec_relu_f32 (const int n, float * y, const float * x) { for (int i 0; i n; i) y[i] (x[i] 0.f) ? x[i] : 0.f; }//
// ADAM
//
// ref: https://arxiv.org/pdf/1412.6980.pdf
//
struct ggml_tensor * ps[GGML_MAX_PARAMS];void ggml_opt_adam(struct ggml_context * ctx,struct ggml_opt_context * opt,struct ggml_opt_params params,struct ggml_tensor * f,struct ggml_cgraph * gf,struct ggml_cgraph * gb) {GGML_ASSERT(ggml_is_scalar(f));// these will store the parameters we want to optimizeif ((opt-params.type ! params.type) || (opt-nx ! hf_adam::nx) || (opt-params.past ! params.past)) {int iter opt-iter;ggml_opt_init(opt-ctx, opt, params, hf_adam::nx);opt-iter iter;}// constantsconst float sched params.adam.sched;const float decay params.adam.decay * sched;const float alpha params.adam.alpha * sched;const float beta1 params.adam.beta1;const float beta2 params.adam.beta2;const float eps params.adam.eps;float * x (float *)opt-adam.x-data; // view of the parametersfloat * g1 (float *)opt-adam.g1-data; // gradientfloat * g2 (float *)opt-adam.g2-data; // gradient squaredfloat * m (float *)opt-adam.m-data; // first momentfloat * v (float *)opt-adam.v-data; // second momentfloat * mh (float *)opt-adam.mh-data; // first moment hatfloat * vh (float *)opt-adam.vh-data; // second moment hatfloat * pf params.past 0 ? (float *)opt-adam.pf-data : NULL; // past function values// update viewggml_opt_get_params(hf_adam::np, ps, x);// compute the function valueggml_graph_reset (gf);ggml_set_f32 (f-grad, 1.0f);ggml_graph_compute_with_ctx(ctx, gb, params.n_threads);opt-adam.fx_prev ggml_get_f32_1d(f, 0);opt-adam.fx_best opt-adam.fx_prev;if (pf) {pf[opt-iter % params.past] opt-adam.fx_prev;}// initializeif (opt-just_initialized) {
// opt-adam.n_no_improvement 0; // *** stack smashing detected ***: terminatedopt-just_initialized false;}float * fx_best opt-adam.fx_best;float * fx_prev opt-adam.fx_prev;int * n_no_improvement opt-adam.n_no_improvement;int iter0 opt-iter;// run the optimizerfor (int t 0; t 20; t) {
// for (int t 0; t params.adam.n_iter; t) {cout t endl;opt-iter iter0 t 1;GGML_PRINT_DEBUG ( iter %d \n, t);GGML_PRINT_DEBUG (f %10.6f\n, ggml_get_f32_1d(f, 0));GGML_PRINT_DEBUG_5(df/dx0 %10.6f\n, ggml_get_f32_1d(ps[0]-grad, 0));GGML_PRINT_DEBUG_5(df/dx1 %10.6f\n, ggml_get_f32_1d(ps[1]-grad, 0));for (int i 0; i hf_adam::np; i) {GGML_PRINT_DEBUG(param %d: %10.6f, g %10.6f\n, i,ggml_get_f32_1d(ps[i], 0), ggml_get_f32_1d(ps[i]-grad, 0));}const int64_t t_start_wall ggml_time_us();const int64_t t_start_cpu ggml_cycles();
// UNUSED(t_start_wall);
// UNUSED(t_start_cpu);{
// update the gradientggml_opt_get_grad(hf_adam::np, ps, g1); // np : 8 ,ps: ggml_tensor * [256] ,g1 float * 梯度向量。该向量包含了每个参数对应的梯度值用于计算梯度的一阶矩估计。// m_t beta1*m_t-1 (1 - beta1)*g_tggml_vec_scale_f32(hf_adam::nx, m, beta1);ggml_vec_mad_f32 (hf_adam::nx, m, g1, 1.0f - beta1);// g2 g1^2ggml_vec_sqr_f32 (hf_adam::nx, g2, g1);// v_t beta2*v_t-1 (1 - beta2)*g_t^2ggml_vec_scale_f32(hf_adam::nx, v, beta2);ggml_vec_mad_f32 (hf_adam::nx, v, g2, 1.0f - beta2);// m^hat m_t / (1 - beta1^t)
// v^hat v_t / (1 - beta2^t)
// x_t x_t-1 - sched*(alpha*m^hat/(sqrt(v^hat) eps) decay*x_t-1)
// x_t x_t-1 - sched*alpha*m^hat/(sqrt(v^hat) eps) - sched*decay*x_t-1
// x_t x_t-1*(1-sched*decay) - sched*alpha*m^hat/(sqrt(v^hat) eps)
// x_t x_t-1*(1-sched*decay) sched*decay*(-alpha/decay)*m^hat/(sqrt(v^hat) eps)
// x_t mix(x_t-1, (-alpha/decay)*m^hat/(sqrt(v^hat) eps), sched*decay)ggml_vec_cpy_f32 (hf_adam::nx, mh, m);ggml_vec_cpy_f32 (hf_adam::nx, vh, v);ggml_vec_scale_f32(hf_adam::nx, mh, alpha/(1.0f - powf(beta1, opt-iter)));ggml_vec_scale_f32(hf_adam::nx, vh, 1.0f/(1.0f - powf(beta2, opt-iter)));ggml_vec_sqrt_f32 (hf_adam::nx, vh, vh);ggml_vec_acc1_f32 (hf_adam::nx, vh, eps);ggml_vec_div_f32 (hf_adam::nx, mh, mh, vh);ggml_vec_scale_f32(hf_adam::nx, x, 1.0f - decay);ggml_vec_sub_f32 (hf_adam::nx, x, x, mh);// update the parametersggml_opt_set_params(hf_adam::np, ps, x);}ggml_graph_reset (gf);ggml_set_f32 (f-grad, 1.0f);ggml_graph_compute_with_ctx(ctx, gb, params.n_threads);const float fx ggml_get_f32_1d(f, 0);// check convergenceif (fabsf(fx - fx_prev[0])/fx params.adam.eps_f) {GGML_PRINT_DEBUG(converged\n);return ;}// delta-based convergence testif (pf ! NULL) {
// need at least params.past iterations to start checking for convergenceif (params.past iter0 t) {const float rate (pf[(iter0 t)%params.past] - fx)/fx;if (fabsf(rate) params.delta) {return ;}}pf[(iter0 t)%params.past] fx;}fx_prev[0] fx;{const int64_t t_end_cpu ggml_cycles();GGML_PRINT_DEBUG(time iter: %5.3f s\n, ((float)(t_end_cpu - t_start_cpu))/CLOCKS_PER_SEC);
// UNUSED(t_end_cpu);const int64_t t_end_wall ggml_time_us();GGML_PRINT_DEBUG(wall time iter: %5.3f s\n, (t_end_wall - t_start_wall)/1e6);
// UNUSED(t_end_wall);}}return ;
}struct rgb{ float r; float g; float b;};void hf_play_init(){ggml_opt_init(ctx, hf_adam::opt, hf_adam::opt_params, 0);* hf_adam::gf ggml_build_forward (a.e);* hf_adam::gb ggml_build_backward(ctx, hf_adam::gf, true);hf_adam::np 0;hf_adam::nx 0;for (int i 0; i hf_adam::gf-n_nodes; i) {if (hf_adam::gf-nodes[i]-is_param) {GGML_PRINT_DEBUG(found param %d: grad-op %d\n, np, gf-nodes[i]-grad-op);GGML_ASSERT( hf_adam::np GGML_MAX_PARAMS);ps[ hf_adam::np] hf_adam::gf-nodes[i];hf_adam::nx ggml_nelements(hf_adam::gf-nodes[i]);}}}static bool flag true;void hf_play_start_train() {ggml_cgraph ge a.ge;ggml_tensor * e a.e;ggml_tensor * output a.output;ggml_tensor * input a.intput;ggml_tensor * target a.target;if(flag){cout------init------ endl;hf_play_init();flag false;}while(1){if(*istrain){// ggml_opt(ctx, opt_params, e); // 通过指定的优化参数优化张量ecoutpppppppppppppppppppppppppppppppppppppppppppppppppp endl;cout hf_adam::about endl;ggml_opt_adam(ctx, hf_adam::opt, hf_adam::opt-params, a.e, hf_adam::gf, hf_adam::gb);// ggml_opt_resume_g(ctx, hf_adam::opt, a.e, hf_adam::gf, hf_adam::gb);
// ggml_opt_adam(ctx, hf_adam::opt, hf_adam::opt-params, a.e, hf_adam::gf, hf_adam::gb);hf_set_data_random();*istrain 0;}else{coutoooooooooooooooooooooooooooooooooooooooooooooooo endl;ggml_graph_reset(a.ge); // 重置计算图ggml_graph_compute_with_ctx(ctx, a.ge, /*n_threads*/ 1); // 使用指定上下文计算计算图float fe ggml_get_f32_1d(e, 0); // 获取张量e的第一个元素值printf(%s: e %.4f\n, __func__, fe); // 输出e的值hf_out(output);for(int i0;i10000000;i);*istrain 1;}}return ;
}#ifdef __cplusplus
}
#endifint main(void) {hf_play_start_train();return 0;
} #include ggml.h#include cmath
#include cstdio
#include cstdlib
#include cassert
#include random
#include ggml.h
#include stdio.h
#include cstring
#include vector
#include random#define MAX_NARGS 2#if defined(__GNUC__)
#pragma GCC diagnostic ignored -Wdouble-promotion
#endif//
// logging
//
#define GGML_DEBUG 0
#if (GGML_DEBUG 1)
#define GGML_PRINT_DEBUG(...) printf(__VA_ARGS__)
#else
#define GGML_PRINT_DEBUG(...)
#endif#if (GGML_DEBUG 5)
#define GGML_PRINT_DEBUG_5(...) printf(__VA_ARGS__)
#else
#define GGML_PRINT_DEBUG_5(...)
#endif#if (GGML_DEBUG 10)
#define GGML_PRINT_DEBUG_10(...) printf(__VA_ARGS__)
#else
#define GGML_PRINT_DEBUG_10(...)
#endif#define GGML_PRINT(...) printf(__VA_ARGS__)float frand(void) {return (float)rand()/(float)RAND_MAX;
}int irand(int n) {return rand()%n;
}void get_random_dims(int64_t * dims, int ndims) {dims[0] dims[1] dims[2] dims[3] 1;for (int i 0; i ndims; i) {dims[i] 1 irand(4);}
}void get_random_dims_minmax(int64_t * dims, int ndims, int min, int max) {dims[0] dims[1] dims[2] dims[3] 1;for (int i 0; i ndims; i) {dims[i] min irand(max-min);}
}struct ggml_tensor * get_random_tensor(struct ggml_context * ctx0,int ndims,int64_t ne[],float fmin,float fmax) {struct ggml_tensor * result ggml_new_tensor(ctx0, GGML_TYPE_F32, ndims, ne);switch (ndims) {case 1:for (int i0 0; i0 ne[0]; i0) {((float *)result-data)[i0] frand()*(fmax - fmin) fmin;}break;case 2:for (int i1 0; i1 ne[1]; i1) {for (int i0 0; i0 ne[0]; i0) {((float *)result-data)[i1*ne[0] i0] frand()*(fmax - fmin) fmin;}}break;case 3:for (int i2 0; i2 ne[2]; i2) {for (int i1 0; i1 ne[1]; i1) {for (int i0 0; i0 ne[0]; i0) {((float *)result-data)[i2*ne[1]*ne[0] i1*ne[0] i0] frand()*(fmax - fmin) fmin;}}}break;case 4:for (int i3 0; i3 ne[3]; i3) {for (int i2 0; i2 ne[2]; i2) {for (int i1 0; i1 ne[1]; i1) {for (int i0 0; i0 ne[0]; i0) {((float *)result-data)[i3*ne[2]*ne[1]*ne[0] i2*ne[1]*ne[0] i1*ne[0] i0] frand()*(fmax - fmin) fmin;}}}}break;default:assert(false);};return result;
}float get_element(const struct ggml_tensor * t, int idx) {return ((float *)t-data)[idx];
}void set_element(struct ggml_tensor * t, int idx, float value) {((float *)t-data)[idx] value;
}float run(int val){return 0.1f;
}struct ggml_init_params params {/* .mem_size */ 1024*1024*10, // 设置内存大小为1GB/* .mem_buffer */ NULL, // 设置内存缓冲区为空/* .no_alloc */ false, // 允许内存分配
};
struct ggml_context * ctx ggml_init(params); // 创建一个ggml_context结构体对象int64_t ne1[4] {2, 512, 1, 1};
int64_t ne2[4] {512, 256, 1, 1};
int64_t ne3[4] {1, 256, 1, 1};
int64_t ne4[4] {256, 3, 1, 1};
int64_t ne5[4] {3, 1, 1, 1};int64_t mid_w[4] {256, 256, 1, 1};
int64_t mid_b[4] {256, 1, 1, 1};struct res {ggml_cgraph ge;ggml_tensor * e;ggml_tensor * output;ggml_tensor * intput;ggml_tensor * target;};res hf_getcompute_graph(){struct ggml_tensor * X ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 2);// struct ggml_tensor * X get_random_tensor(ctx, 2, ne1, -1, 1); // 创建一个2维随机张量astruct ggml_tensor * target ggml_new_tensor_1d(ctx, GGML_TYPE_F32, 3);struct ggml_tensor * proj get_random_tensor(ctx, 2, ne1, -1, 1);struct ggml_tensor * fc1_weight get_random_tensor(ctx, 2, ne2, -1, 1);struct ggml_tensor * fc1_bias get_random_tensor(ctx, 2, ne3, -1, 1);struct ggml_tensor * fcout_weight get_random_tensor(ctx, 2, ne4, -1, 1);struct ggml_tensor * fcout_bias get_random_tensor(ctx, 2, ne5, -1, 1);struct ggml_tensor * fc3_weight get_random_tensor(ctx, 2, mid_w, -1, 1);struct ggml_tensor * fc3_bias get_random_tensor(ctx, 2, mid_b, -1, 1);struct ggml_tensor * fc4_weight get_random_tensor(ctx, 2, mid_w, -1, 1);struct ggml_tensor * fc4_bias get_random_tensor(ctx, 2, mid_b, -1, 1);struct ggml_tensor * fc5_weight get_random_tensor(ctx, 2, mid_w, -1, 1);struct ggml_tensor * fc5_bias get_random_tensor(ctx, 2, mid_b, -1, 1);ggml_set_param(ctx, fcout_weight); ggml_set_param(ctx, fcout_bias); ggml_set_param(ctx, fc3_weight); ggml_set_param(ctx, fc3_bias);ggml_set_param(ctx, fc4_weight); ggml_set_param(ctx, fc4_bias); ggml_set_param(ctx, fc5_weight); ggml_set_param(ctx, fc5_bias);ggml_tensor * proj_of_x ggml_mul_mat(ctx, proj,X); // 512*1ggml_tensor * fc1 ggml_add(ctx, ggml_mul_mat(ctx, proj_of_x,fc1_weight), fc1_bias); // 1*256ggml_tensor * fc1_ ggml_view_2d(ctx,fc1,256,1,4*256,0);//ggml_transpose(ctx,fc1); // 256*1// ggml_tensor * fc2 ggml_mul_mat(ctx,fc3_weight,fc1_); // 256*1 ggml_tensor * fc2 ggml_mul_mat(ctx,fc1_,fc3_weight); // 256*1 256*256 1*256ggml_tensor * fc3 ggml_add(ctx, ggml_mul_mat(ctx, fc3_weight, ggml_relu(ctx, fc1_)), fc3_bias);ggml_tensor * fc4 ggml_add(ctx, ggml_mul_mat(ctx, fc4_weight, ggml_relu(ctx, fc3)), fc4_bias);ggml_tensor * fc5 ggml_add(ctx, ggml_mul_mat(ctx, fc5_weight, ggml_relu(ctx, fc4)), fc5_bias);ggml_tensor * fcout ggml_add(ctx, ggml_mul_mat(ctx, fcout_weight, ggml_relu(ctx, fc5)), fcout_bias);// 3*1ggml_tensor * fcout_ ggml_sub(ctx,fcout,target);struct ggml_tensor * e ggml_sum(ctx, ggml_sqr(ctx, fcout_) ); // 计算张量d的平方和estruct ggml_cgraph ge ggml_build_forward(e); // 构建计算图gereturn {ge, e, fcout, X,target};}void hf_out(ggml_tensor * output){printf(f %f\n, ggml_get_f32_1d(output, 0));// cout res ((float *)(fc2-data))[0];printf(f %f\n, ggml_get_f32_1d(output, 1));printf(f %f\n, ggml_get_f32_1d(output, 2));
}void hf_free(){ggml_free(ctx); // 释放上下文内存
}static res a hf_getcompute_graph();
struct ggml_tensor * ps[GGML_MAX_PARAMS];
static int *istrain new int(1);// delete istrain;void hf_set_data_random(){ggml_tensor * input a.intput;ggml_tensor * target a.target;std::vectorfloat digit {1.f,2.f};memcpy(input-data, digit.data(), ggml_nbytes(input));digit {100.3f,200.5f,100.1f};memcpy(target-data, digit.data(), ggml_nbytes(target));*istrain 1;
}void hf_set_data(ggml_tensor * input,ggml_tensor * target,vectorfloat v_in ,vectorfloat v_out){memcpy(input-data, v_in.data(), ggml_nbytes(input));memcpy(target-data, v_out.data(), ggml_nbytes(target));
}static inline bool ggml_is_scalar(const struct ggml_tensor * tensor) {static_assert(GGML_MAX_DIMS 4, GGML_MAX_DIMS is not 4 - update this function);return tensor-ne[0] 1 tensor-ne[1] 1 tensor-ne[2] 1 tensor-ne[3] 1;
}static void ggml_opt_get_params(int np, struct ggml_tensor * const ps[], float * x) {int i 0;for (int p 0; p np; p) {const int64_t ne ggml_nelements(ps[p]) ;
// TODO: add function to get all elements at oncefor (int64_t j 0; j ne; j) {x[i] ggml_get_f32_1d(ps[p], j);}}
}static void ggml_opt_get_grad(int np, struct ggml_tensor * const ps[], float * g) {int i 0;for (int p 0; p np; p) {const int64_t ne ggml_nelements(ps[p]) ;
// TODO: add function to get all elements at oncefor (int64_t j 0; j ne; j) {g[i] ggml_get_f32_1d(ps[p]-grad, j);}}
}// floating point type used to accumulate sums
typedef double ggml_float;inline static void ggml_vec_add_f32 (const int n, float * z, const float * x, const float * y) { for (int i 0; i n; i) z[i] x[i] y[i]; }
inline static void ggml_vec_add1_f32(const int n, float * z, const float * x, const float v) { for (int i 0; i n; i) z[i] x[i] v; }
inline static void ggml_vec_acc_f32 (const int n, float * y, const float * x) { for (int i 0; i n; i) y[i] x[i]; }
inline static void ggml_vec_acc1_f32(const int n, float * y, const float v) { for (int i 0; i n; i) y[i] v; }
inline static void ggml_vec_sub_f32 (const int n, float * z, const float * x, const float * y) { for (int i 0; i n; i) z[i] x[i] - y[i]; }
inline static void ggml_vec_set_f32 (const int n, float * x, const float v) { for (int i 0; i n; i) x[i] v; }
inline static void ggml_vec_cpy_f32 (const int n, float * y, const float * x) { for (int i 0; i n; i) y[i] x[i]; }
inline static void ggml_vec_neg_f32 (const int n, float * y, const float * x) { for (int i 0; i n; i) y[i] -x[i]; }
inline static void ggml_vec_mul_f32 (const int n, float * z, const float * x, const float * y) { for (int i 0; i n; i) z[i] x[i]*y[i]; }
inline static void ggml_vec_div_f32 (const int n, float * z, const float * x, const float * y) { for (int i 0; i n; i) z[i] x[i]/y[i]; }static void ggml_opt_set_params(int np, struct ggml_tensor * const ps[], const float * x) {int i 0;for (int p 0; p np; p) {const int64_t ne ggml_nelements(ps[p]) ;
// TODO: add function to set tensor from arrayfor (int64_t j 0; j ne; j) {ggml_set_f32_1d(ps[p], j, x[i]);}}
}static void ggml_vec_dot_f32(const int n, float * s, const float * x, const float * y) {
#ifdef GGML_SIMDfloat sumf 0.0f;
const int np (n ~(GGML_F32_STEP - 1));GGML_F32_VEC sum[GGML_F32_ARR] { GGML_F32_VEC_ZERO };GGML_F32_VEC ax[GGML_F32_ARR];
GGML_F32_VEC ay[GGML_F32_ARR];for (int i 0; i np; i GGML_F32_STEP) {
for (int j 0; j GGML_F32_ARR; j) {
ax[j] GGML_F32_VEC_LOAD(x i j*GGML_F32_EPR);
ay[j] GGML_F32_VEC_LOAD(y i j*GGML_F32_EPR);sum[j] GGML_F32_VEC_FMA(sum[j], ax[j], ay[j]);
}
}// reduce sum0..sum3 to sum0
GGML_F32_VEC_REDUCE(sumf, sum);// leftovers
for (int i np; i n; i) {
sumf x[i]*y[i];
}
#else
// scalarggml_float sumf 0.0;for (int i 0; i n; i) {sumf (ggml_float)(x[i]*y[i]);}
#endif*s sumf;
}inline static void ggml_vec_mad_f32(const int n, float * y, const float * x, const float v) {
// inline static void ggml_vec_mad_f32(const int n, float * restrict y, const float * restrict x, const float v) {
//#if defined(GGML_SIMD)
// const int np (n ~(GGML_F32_STEP - 1));
//
//GGML_F32_VEC vx GGML_F32_VEC_SET1(v);
//
//GGML_F32_VEC ax[GGML_F32_ARR];
//GGML_F32_VEC ay[GGML_F32_ARR];
//
//for (int i 0; i np; i GGML_F32_STEP) {
//for (int j 0; j GGML_F32_ARR; j) {
//ax[j] GGML_F32_VEC_LOAD(x i j*GGML_F32_EPR);
//ay[j] GGML_F32_VEC_LOAD(y i j*GGML_F32_EPR);
//ay[j] GGML_F32_VEC_FMA(ay[j], ax[j], vx);
//
//GGML_F32_VEC_STORE(y i j*GGML_F32_EPR, ay[j]);
//}
//}
//leftovers
//for (int i np; i n; i) {
//y[i] x[i]*v;
//}
//#elsescalarfor (int i 0; i n; i) {y[i] x[i]*v;}
//#endif
}//inline static void ggml_vec_scale_f32(const int n, float * y, const float v) { for (int i 0; i n; i) y[i] * v; }
inline static void ggml_vec_scale_f32(const int n, float * y, const float v) {
#if defined(GGML_USE_ACCELERATE)vDSP_vsmul(y, 1, v, y, 1, n);
#elif defined(GGML_SIMD)const int np (n ~(GGML_F32_STEP - 1));GGML_F32_VEC vx GGML_F32_VEC_SET1(v);GGML_F32_VEC ay[GGML_F32_ARR];for (int i 0; i np; i GGML_F32_STEP) {
for (int j 0; j GGML_F32_ARR; j) {
ay[j] GGML_F32_VEC_LOAD(y i j*GGML_F32_EPR);
ay[j] GGML_F32_VEC_MUL(ay[j], vx);GGML_F32_VEC_STORE(y i j*GGML_F32_EPR, ay[j]);
}
}// leftovers
for (int i np; i n; i) {
y[i] * v;
}
#else
// scalarfor (int i 0; i n; i) {y[i] * v;}
#endif
}inline static void ggml_vec_norm_f32 (const int n, float * s, const float * x) { ggml_vec_dot_f32(n, s, x, x); *s sqrtf(*s); }
inline static void ggml_vec_sqr_f32 (const int n, float * y, const float * x) { for (int i 0; i n; i) y[i] x[i]*x[i]; }
inline static void ggml_vec_sqrt_f32 (const int n, float * y, const float * x) { for (int i 0; i n; i) y[i] sqrtf(x[i]); }
inline static void ggml_vec_log_f32 (const int n, float * y, const float * x) { for (int i 0; i n; i) y[i] logf(x[i]); }
inline static void ggml_vec_abs_f32 (const int n, float * y, const float * x) { for (int i 0; i n; i) y[i] fabsf(x[i]); }
inline static void ggml_vec_sgn_f32 (const int n, float * y, const float * x) { for (int i 0; i n; i) y[i] (x[i] 0.f) ? 1.f : ((x[i] 0.f) ? -1.f : 0.f); }
inline static void ggml_vec_step_f32 (const int n, float * y, const float * x) { for (int i 0; i n; i) y[i] (x[i] 0.f) ? 1.f : 0.f; }
inline static void ggml_vec_tanh_f32 (const int n, float * y, const float * x) { for (int i 0; i n; i) y[i] tanhf(x[i]); }
inline static void ggml_vec_elu_f32 (const int n, float * y, const float * x) { for (int i 0; i n; i) y[i] (x[i] 0.f) ? x[i] : expf(x[i])-1; }
inline static void ggml_vec_relu_f32 (const int n, float * y, const float * x) { for (int i 0; i n; i) y[i] (x[i] 0.f) ? x[i] : 0.f; }struct hf_adam{hf_adam(){ggml_opt_init(ctx, hf_adam::opt, hf_adam::opt_params, 0);cout------init------ endl;//hf_play_init();* hf_adam::gf ggml_build_forward (a.e);* hf_adam::gb ggml_build_backward(ctx, hf_adam::gf, true);hf_adam::np 0;hf_adam::nx 0;for (int i 0; i hf_adam::gf-n_nodes; i) {if (hf_adam::gf-nodes[i]-is_param) {GGML_PRINT_DEBUG(found param %d: grad-op %d\n, np, gf-nodes[i]-grad-op);GGML_ASSERT( hf_adam::np GGML_MAX_PARAMS);ps[ hf_adam::np] hf_adam::gf-nodes[i];hf_adam::nx ggml_nelements(hf_adam::gf-nodes[i]);}}// -----------hf_play_init();------endGGML_ASSERT(ggml_is_scalar(a.e));
// if ((opt-params.type ! hf_adam::opt-params.type) || (opt-nx ! hf_adam::nx) || (opt-params.past ! hf_adam::opt-params.past)) {if ((false) || (opt-nx ! hf_adam::nx) || (opt-params.past ! hf_adam::opt-params.past)) {int iter opt-iter;ggml_opt_init(opt-ctx, opt, hf_adam::opt-params, hf_adam::nx);opt-iter iter;}// constantssched opt-params.adam.sched;decay opt-params.adam.decay * sched;alpha opt-params.adam.alpha * sched;beta1 opt-params.adam.beta1;beta2 opt-params.adam.beta2;eps opt-params.adam.eps;x (float *)opt-adam.x-data; // view of the parametersg1 (float *)opt-adam.g1-data; // gradientg2 (float *)opt-adam.g2-data; // gradient squaredm (float *)opt-adam.m-data; // first momentv (float *)opt-adam.v-data; // second momentmh (float *)opt-adam.mh-data; // first moment hatvh (float *)opt-adam.vh-data; // second moment hatpf opt-params.past 0 ? (float *)opt-adam.pf-data : NULL; // past function values}static string about ;// this is a namespace for train only one epoch for adam;static struct ggml_opt_params opt_params;// ggml_opt_default_params(GGML_OPT_ADAM); // 获取默认的优化参数static struct ggml_opt_context * opt ;// (struct ggml_opt_context *) alloca(sizeof(struct ggml_opt_context));
// staticstatic struct ggml_tensor * gfbuf ;// ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(struct ggml_cgraph) / ggml_type_size(GGML_TYPE_I32) (sizeof(struct ggml_cgraph) % ggml_type_size(GGML_TYPE_I32) ? 1 : 0));static struct ggml_tensor * gbbuf ;// ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(struct ggml_cgraph) / ggml_type_size(GGML_TYPE_I32) (sizeof(struct ggml_cgraph) % ggml_type_size(GGML_TYPE_I32) ? 1 : 0));// build forward backward compute graphsstatic struct ggml_cgraph * gf ;// (struct ggml_cgraph *) hf_adam::gfbuf-data;static struct ggml_cgraph * gb ;// (struct ggml_cgraph *) hf_adam::gbbuf-data;static int np;static int nx;static ggml_cgraph ge ;// a.ge;// constantsstatic float sched ;static float decay ;static float alpha ;static float beta1 ;static float beta2 ;static float eps ;static float * x ; // view of the parametersstatic float * g1 ; // gradientstatic float * g2 ; // gradient squaredstatic float * m ; // first momentstatic float * v ; // second momentstatic float * mh ; // first moment hatstatic float * vh ; // second moment hatstatic float * pf ; // past function valuesstatic int flag ;// true;};
string hf_adam::about this is a namespace for train only one epoch for adam;
struct ggml_opt_params hf_adam::opt_params ggml_opt_default_params(GGML_OPT_ADAM); // 获取默认的优化参数
struct ggml_opt_context * hf_adam::opt (struct ggml_opt_context *) alloca(sizeof(struct ggml_opt_context));
struct ggml_tensor * hf_adam::gfbuf ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(struct ggml_cgraph) / ggml_type_size(GGML_TYPE_I32) (sizeof(struct ggml_cgraph) % ggml_type_size(GGML_TYPE_I32) ? 1 : 0));
struct ggml_tensor * hf_adam::gbbuf ggml_new_tensor_1d(ctx, GGML_TYPE_I32, sizeof(struct ggml_cgraph) / ggml_type_size(GGML_TYPE_I32) (sizeof(struct ggml_cgraph) % ggml_type_size(GGML_TYPE_I32) ? 1 : 0));
struct ggml_cgraph * hf_adam::gf (struct ggml_cgraph *) hf_adam::gfbuf-data;
struct ggml_cgraph * hf_adam::gb (struct ggml_cgraph *) hf_adam::gbbuf-data;
int hf_adam::np 0;
int hf_adam::nx 0;
struct ggml_cgraph hf_adam::ge a.ge;float hf_adam::sched 0.f;
float hf_adam::decay 0.f;
float hf_adam::alpha 0.f;
float hf_adam::beta1 0.f;
float hf_adam::beta2 0.f;
float hf_adam::eps 0.f;float * hf_adam::x nullptr; // view of the parameters
float * hf_adam::g1 nullptr; // gradient
float * hf_adam::g2 nullptr; // gradient squared
float * hf_adam::m nullptr; // first moment
float * hf_adam::v nullptr; // second moment
float * hf_adam::mh nullptr; // first moment hat
float * hf_adam::vh nullptr; // second moment hatfloat * hf_adam::pf nullptr; // past function valuesint hf_adam::flag 1;#ifdef __cplusplus
extern C {
#endif//
// ADAM
//
// ref: https://arxiv.org/pdf/1412.6980.pdf
//void ggml_opt_adam( ) {
// update viewggml_opt_get_params(hf_adam::np, ps, hf_adam::x);struct ggml_tensor * f a.e;// compute the function valueggml_graph_reset (hf_adam::gf);ggml_set_f32 (f-grad, 1.0f);ggml_graph_compute_with_ctx(ctx, hf_adam::gb, hf_adam::opt-params.n_threads);hf_adam::opt-adam.fx_prev ggml_get_f32_1d(f, 0);hf_adam::opt-adam.fx_best hf_adam::opt-adam.fx_prev;if (hf_adam::pf) {hf_adam::pf[hf_adam::opt-iter % hf_adam::opt-params.past] hf_adam::opt-adam.fx_prev;}// initializeif (hf_adam::opt-just_initialized) {
// opt-adam.n_no_improvement 0; // *** stack smashing detected ***: terminatedhf_adam::opt-just_initialized false;}float * fx_best hf_adam::opt-adam.fx_best;float * fx_prev hf_adam::opt-adam.fx_prev;int * n_no_improvement hf_adam::opt-adam.n_no_improvement;int iter0 hf_adam::opt-iter;for (int t 0; t 50; t) {
// for (int t 0; t params.adam.n_iter; t) {
// cout t endl;hf_adam::opt-iter iter0 t 1;GGML_PRINT_DEBUG ( iter %d \n, t);GGML_PRINT_DEBUG (f %10.6f\n, ggml_get_f32_1d(f, 0));GGML_PRINT_DEBUG_5(df/dx0 %10.6f\n, ggml_get_f32_1d(ps[0]-grad, 0));GGML_PRINT_DEBUG_5(df/dx1 %10.6f\n, ggml_get_f32_1d(ps[1]-grad, 0));for (int i 0; i hf_adam::np; i) {GGML_PRINT_DEBUG(param %d: %10.6f, g %10.6f\n, i,ggml_get_f32_1d(ps[i], 0), ggml_get_f32_1d(ps[i]-grad, 0));}const int64_t t_start_wall ggml_time_us();const int64_t t_start_cpu ggml_cycles();
// UNUSED(t_start_wall);
// UNUSED(t_start_cpu);{
// update the gradientggml_opt_get_grad(hf_adam::np, ps, hf_adam::g1); // np : 8 ,ps: ggml_tensor * [256] ,g1 float * 梯度向量。该向量包含了每个参数对应的梯度值用于计算梯度的一阶矩估计。// m_t beta1*m_t-1 (1 - beta1)*g_tggml_vec_scale_f32(hf_adam::nx, hf_adam::m, hf_adam::beta1);ggml_vec_mad_f32 (hf_adam::nx, hf_adam::m, hf_adam::g1, 1.0f - hf_adam::beta1);// g2 g1^2ggml_vec_sqr_f32 (hf_adam::nx, hf_adam::g2, hf_adam::g1);// v_t beta2*v_t-1 (1 - beta2)*g_t^2ggml_vec_scale_f32(hf_adam::nx, hf_adam::v, hf_adam::beta2);ggml_vec_mad_f32 (hf_adam::nx, hf_adam::v, hf_adam::g2, 1.0f - hf_adam::beta2);// m^hat m_t / (1 - beta1^t)
// v^hat v_t / (1 - beta2^t)
// x_t x_t-1 - sched*(alpha*m^hat/(sqrt(v^hat) eps) decay*x_t-1)
// x_t x_t-1 - sched*alpha*m^hat/(sqrt(v^hat) eps) - sched*decay*x_t-1
// x_t x_t-1*(1-sched*decay) - sched*alpha*m^hat/(sqrt(v^hat) eps)
// x_t x_t-1*(1-sched*decay) sched*decay*(-alpha/decay)*m^hat/(sqrt(v^hat) eps)
// x_t mix(x_t-1, (-alpha/decay)*m^hat/(sqrt(v^hat) eps), sched*decay)ggml_vec_cpy_f32 (hf_adam::nx, hf_adam::mh, hf_adam::m);ggml_vec_cpy_f32 (hf_adam::nx, hf_adam::vh, hf_adam::v);ggml_vec_scale_f32(hf_adam::nx, hf_adam::mh, hf_adam::alpha/(1.0f - powf(hf_adam::beta1, hf_adam::opt-iter)));ggml_vec_scale_f32(hf_adam::nx, hf_adam::vh, 1.0f/(1.0f - powf(hf_adam::beta2, hf_adam::opt-iter)));ggml_vec_sqrt_f32 (hf_adam::nx, hf_adam::vh, hf_adam::vh);ggml_vec_acc1_f32 (hf_adam::nx, hf_adam::vh, hf_adam::eps);ggml_vec_div_f32 (hf_adam::nx, hf_adam::mh, hf_adam::mh, hf_adam::vh);ggml_vec_scale_f32(hf_adam::nx, hf_adam::x, 1.0f - hf_adam::decay);ggml_vec_sub_f32 (hf_adam::nx, hf_adam::x, hf_adam::x, hf_adam::mh);// update the parametersggml_opt_set_params(hf_adam::np, ps, hf_adam::x);}ggml_graph_reset (hf_adam::gf);ggml_set_f32 (f-grad, 1.0f);ggml_graph_compute_with_ctx(ctx, hf_adam::gb, hf_adam::opt-params.n_threads);const float fx ggml_get_f32_1d(f, 0);// check convergenceif (fabsf(fx - fx_prev[0])/fx hf_adam::opt-params.adam.eps_f) {GGML_PRINT_DEBUG(converged\n);return ;}// delta-based convergence testif (hf_adam::pf ! NULL) {
// need at least params.past iterations to start checking for convergenceif (hf_adam::opt-params.past iter0 t) {const float rate (hf_adam::pf[(iter0 t)%hf_adam::opt-params.past] - fx)/fx;if (fabsf(rate) hf_adam::opt-params.delta) {return ;}}hf_adam::pf[(iter0 t)%hf_adam::opt-params.past] fx;}fx_prev[0] fx;{const int64_t t_end_cpu ggml_cycles();GGML_PRINT_DEBUG(time iter: %5.3f s\n, ((float)(t_end_cpu - t_start_cpu))/CLOCKS_PER_SEC);
// UNUSED(t_end_cpu);const int64_t t_end_wall ggml_time_us();GGML_PRINT_DEBUG(wall time iter: %5.3f s\n, (t_end_wall - t_start_wall)/1e6);
// UNUSED(t_end_wall);}}ggml_graph_reset(a.ge); // 重置计算图ggml_graph_compute_with_ctx(ctx, a.ge, /*n_threads*/ 1); // 使用指定上下文计算计算图float fe ggml_get_f32_1d(a.e, 0); // 获取张量e的第一个元素值printf(%s: e %.4f\n, __func__, fe); // 输出e的值hf_out(a.output);return ;
}void hf_play_start_train() {ggml_cgraph ge a.ge;if(hf_adam::flag 1){hf_adam mhf_adam;hf_adam::flag 0;}// while(1){
for(int i0;i2;i)if(*istrain){hf_set_data_random();// ggml_opt(ctx, opt_params, e); // 通过指定的优化参数优化张量ecoutpppppppppppppppppppppppppppppppppppppppppppppppppp endl;cout hf_adam::about endl;
// ggml_opt_adam(ctx, hf_adam::opt, hf_adam::opt-params, a.e, hf_adam::gf, hf_adam::gb);// ggml_opt_resume_g(ctx, hf_adam::opt, a.e, hf_adam::gf, hf_adam::gb);ggml_opt_adam();ggml_opt_adam();ggml_opt_adam();ggml_opt_adam();ggml_opt_adam();//ggml_opt_adam(ctx, hf_adam::opt, hf_adam::opt-params, a.e, hf_adam::gf, hf_adam::gb);
// hf_set_data_random();ggml_graph_reset(a.ge); // 重置计算图ggml_graph_compute_with_ctx(ctx, a.ge, /*n_threads*/ 1); // 使用指定上下文计算计算图float fe ggml_get_f32_1d(a.e, 0); // 获取张量e的第一个元素值printf(%s: e %.4f\n, __func__, fe); // 输出e的值hf_out(a.output);*istrain 0;}else{coutoooooooooooooooooooooooooooooooooooooooooooooooo endl;ggml_graph_reset(a.ge); // 重置计算图ggml_graph_compute_with_ctx(ctx, a.ge, /*n_threads*/ 1); // 使用指定上下文计算计算图float fe ggml_get_f32_1d(a.e, 0); // 获取张量e的第一个元素值printf(%s: e %.4f\n, __func__, fe); // 输出e的值hf_out(a.output);
// for(int i0;i10000000;i);*istrain 1;}
// }return ;
}#ifdef __cplusplus
}
#endifint main(void) {
// for(int index0;index100;index){
// hf_play_start_train();
// }hf_play_start_train();hf_play_start_train();hf_play_start_train();hf_play_start_train();hf_play_start_train();hf_play_start_train();hf_play_start_train();hf_play_start_train();hf_play_start_train();hf_play_start_train();hf_play_start_train();hf_play_start_train();return 0;
}