diff --git a/.gitignore b/.gitignore index adf8f72..8f4aca8 100644 --- a/.gitignore +++ b/.gitignore @@ -21,3 +21,11 @@ # Go workspace file go.work +/media +/misc +/vendor +/tmp +.idea +.vscode +.DS_Store +.git diff --git a/README.md b/README.md index 28a0336..3b1d8d7 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,73 @@ -# fafa-crawler +# Go Crawler Project -发发图片库爬虫数据处理项目 \ No newline at end of file +该项目是一个使用 Go 1.24 构建的简单爬虫项目 fafa-crawler。它利用 Fiber v3 Web 框架处理 HTTP 请求,并使用最新版 GORM Gen 进行数据库操作。项目结构分为多个模块,以便于组织和维护。 + +帮我使用 Go 1.24 设计一个爬虫项目 fafa-crawler 最简单架构,需要 toml 配置,数据操作使用最新版 gorm gen,web framework 使用 fiber v3 ,src 下建立 mapper,services,models,util,controller 多个模块. + +## 项目结构 + +``` +fafa-crawler +├── config +│ └── config.toml # 数据库和爬虫设置的配置文件 +├── src +│ ├── controller # 包含 HTTP 请求处理程序 +│ │ └── controller.go +│ ├── mapper # 数据映射层,负责 CRUD 操作 +│ │ └── mapper.go +│ ├── models # 使用 GORM 定义的数据库模型 +│ │ └── model.go +│ ├── services # 业务逻辑层 +│ │ └── service.go +│ ├── util # 常用工具函数 +│ │ └── util.go +│ └── main.go # 应用程序的入口点 +├── go.mod # Go 模块配置文件 +├── go.sum # 依赖项版本信息 +└── README.md # 项目文档 +``` + +## 安装步骤 + +1. 克隆仓库: + + ``` + git clone + cd fafa-crawler + ``` + +2. 安装依赖: + + ``` + go mod tidy + ``` + +3. 在根目录创建 `.env` 文件以设置环境变量(如有必要)。 + +## 配置 + +编辑 `config/config.toml` 文件以设置数据库连接和爬虫配置。 + +## 运行应用程序 + +使用以下命令运行应用程序: + +``` +go run src/main.go +``` + +服务器将在环境变量 `PORT` 指定的端口上启动,默认为 `3000`。 + +## 使用说明 + +- **GET /data**: 从服务器检索数据。 +- **POST /data**: 向服务器发送数据。 + +## 开发 + +- 按照上述结构添加新功能或模块。 +- 确保为新功能编写测试。 + +## 许可证 + +该项目根据 MIT 许可证进行许可。有关详细信息,请参阅 LICENSE 文件。 diff --git a/config/config.toml b/config/config.toml new file mode 100644 index 0000000..a0713cf --- /dev/null +++ b/config/config.toml @@ -0,0 +1,25 @@ +[mysql] +host = "localhost" +port = 3306 +user = "root" +password = "1234567890" +name = "goods_library" + +[mysql_prod] +host = "43.139.72.196" +port = 13307 +user = "webprod" +password = "v9W2ER6KG9HVtH1mjAytRvdr" +name = "mall_prod" + +max_idle_conns = 10 +max_open_conns = 100 +conn_max_lifetime = 1 # 小时 +conn_max_idle_time = 30 # 分钟 + +# COS配置 +[cossdk] +bucket_url = "https://media-mall-prod-1259811287.cos.ap-guangzhou.myqcloud.com" +secret_id = "AKIDOkvMzXt58zw5RrCLI5jLo8JKBtRp7FC7" +secret_key = "4xkSfgqCXoAQZgaO1Tn3A5oMvSsDNk3D" +base_path = "media/images/goods_library" diff --git a/config/gen_mapper.go b/config/gen_mapper.go new file mode 100755 index 0000000..c2e12cc --- /dev/null +++ b/config/gen_mapper.go @@ -0,0 +1,62 @@ +package main + +import ( + "strings" + + _ "github.com/go-sql-driver/mysql" + "gorm.io/driver/mysql" + "gorm.io/gen" + "gorm.io/gorm" +) + +func main() { + dsn := "root:1234567890@tcp(localhost:3306)/goods_library?charset=utf8mb4&parseTime=True&loc=Local" + db, err := gorm.Open(mysql.Open(dsn), &gorm.Config{}) + if err != nil { + panic("gorm 打开数据库出错:" + err.Error()) + } + + g := gen.NewGenerator(gen.Config{ + OutPath: "./src/mapper", + ModelPkgPath: "./src/models", + Mode: gen.WithDefaultQuery | gen.WithQueryInterface | gen.WithoutContext, + FieldNullable: false, + FieldCoverable: false, + FieldSignable: true, + FieldWithIndexTag: true, + FieldWithTypeTag: true, + }) + + g.UseDB(db) + + tbNames, err := db.Migrator().GetTables() + if err != nil { + panic("get all tables fail: " + err.Error()) + } + + metas := make([]interface{}, 0) + for _, v := range tbNames { + if !strings.Contains(v, "_20") && !strings.Contains(v, "_default") { + metas = append(metas, g.GenerateModel(v)) + } + } + + g.ApplyBasic(metas...) + + // g.ApplyBasic(g.GenerateAllTable()...) + + // g.ApplyBasic( + // g.GenerateModel("account_balance_log"), + // ) + + g.Execute() + + // 批量替换文件夹的内容 + // modelPath, err := filepath.Abs("./src/models") + // log.Println("models path is:" + modelPath) + // if err != nil { + // log.Println(err) + // } + + // log.Println("models file have replace") +} diff --git a/fafa-crawler b/fafa-crawler new file mode 100755 index 0000000..40e6b24 Binary files /dev/null and b/fafa-crawler differ diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..617347c --- /dev/null +++ b/go.mod @@ -0,0 +1,83 @@ +module fafa-crawler + +go 1.25.0 + +require ( + github.com/go-sql-driver/mysql v1.9.3 + github.com/goccy/go-json v0.10.5 + github.com/gocolly/colly v1.2.0 + github.com/gofiber/fiber/v3 v3.0.0-rc.3 + github.com/google/uuid v1.6.0 + github.com/joho/godotenv v1.5.1 + github.com/json-iterator/go v1.1.12 + github.com/mozillazg/go-pinyin v0.21.0 + github.com/spf13/viper v1.21.0 + github.com/tencentyun/cos-go-sdk-v5 v0.7.71 + github.com/tidwall/gjson v1.18.0 + github.com/xuri/excelize/v2 v2.10.0 + gorm.io/driver/mysql v1.6.0 + gorm.io/gen v0.3.27 + gorm.io/gorm v1.31.1 + gorm.io/plugin/dbresolver v1.6.2 +) + +require ( + filippo.io/edwards25519 v1.1.0 // indirect + github.com/PuerkitoBio/goquery v1.11.0 // indirect + github.com/andybalholm/brotli v1.2.0 // indirect + github.com/andybalholm/cascadia v1.3.3 // indirect + github.com/antchfx/htmlquery v1.3.5 // indirect + github.com/antchfx/xmlquery v1.5.0 // indirect + github.com/antchfx/xpath v1.3.5 // indirect + github.com/clbanning/mxj v1.8.4 // indirect + github.com/fsnotify/fsnotify v1.9.0 // indirect + github.com/go-viper/mapstructure/v2 v2.4.0 // indirect + github.com/gobwas/glob v0.2.3 // indirect + github.com/gofiber/schema v1.6.0 // indirect + github.com/gofiber/utils/v2 v2.0.0-rc.2 // indirect + github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 // indirect + github.com/golang/protobuf v1.5.4 // indirect + github.com/google/go-querystring v1.0.0 // indirect + github.com/jinzhu/inflection v1.0.0 // indirect + github.com/jinzhu/now v1.1.5 // indirect + github.com/kennygrant/sanitize v1.2.4 // indirect + github.com/klauspost/compress v1.18.1 // indirect + github.com/mattn/go-colorable v0.1.14 // indirect + github.com/mattn/go-isatty v0.0.20 // indirect + github.com/mitchellh/mapstructure v1.4.3 // indirect + github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect + github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/mozillazg/go-httpheader v0.2.1 // indirect + github.com/pelletier/go-toml/v2 v2.2.4 // indirect + github.com/philhofer/fwd v1.2.0 // indirect + github.com/richardlehane/mscfb v1.0.4 // indirect + github.com/richardlehane/msoleps v1.0.4 // indirect + github.com/sagikazarmark/locafero v0.12.0 // indirect + github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d // indirect + github.com/spf13/afero v1.15.0 // indirect + github.com/spf13/cast v1.10.0 // indirect + github.com/spf13/pflag v1.0.10 // indirect + github.com/subosito/gotenv v1.6.0 // indirect + github.com/temoto/robotstxt v1.1.2 // indirect + github.com/tidwall/match v1.2.0 // indirect + github.com/tidwall/pretty v1.2.1 // indirect + github.com/tiendc/go-deepcopy v1.7.1 // indirect + github.com/tinylib/msgp v1.5.0 // indirect + github.com/valyala/bytebufferpool v1.0.0 // indirect + github.com/valyala/fasthttp v1.68.0 // indirect + github.com/xuri/efp v0.0.1 // indirect + github.com/xuri/nfp v0.0.2-0.20250530014748-2ddeb826f9a9 // indirect + go.yaml.in/yaml/v3 v3.0.4 // indirect + golang.org/x/crypto v0.45.0 // indirect + golang.org/x/mod v0.30.0 // indirect + golang.org/x/net v0.47.0 // indirect + golang.org/x/sync v0.18.0 // indirect + golang.org/x/sys v0.38.0 // indirect + golang.org/x/text v0.31.0 // indirect + golang.org/x/tools v0.39.0 // indirect + google.golang.org/appengine v1.6.8 // indirect + google.golang.org/protobuf v1.36.10 // indirect + gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 // indirect + gorm.io/datatypes v1.2.7 // indirect + gorm.io/hints v1.1.2 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..3ec2bdf --- /dev/null +++ b/go.sum @@ -0,0 +1,292 @@ +filippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA= +filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4= +github.com/PuerkitoBio/goquery v1.11.0 h1:jZ7pwMQXIITcUXNH83LLk+txlaEy6NVOfTuP43xxfqw= +github.com/PuerkitoBio/goquery v1.11.0/go.mod h1:wQHgxUOU3JGuj3oD/QFfxUdlzW6xPHfqyHre6VMY4DQ= +github.com/andybalholm/brotli v1.2.0 h1:ukwgCxwYrmACq68yiUqwIWnGY0cTPox/M94sVwToPjQ= +github.com/andybalholm/brotli v1.2.0/go.mod h1:rzTDkvFWvIrjDXZHkuS16NPggd91W3kUSvPlQ1pLaKY= +github.com/andybalholm/cascadia v1.3.3 h1:AG2YHrzJIm4BZ19iwJ/DAua6Btl3IwJX+VI4kktS1LM= +github.com/andybalholm/cascadia v1.3.3/go.mod h1:xNd9bqTn98Ln4DwST8/nG+H0yuB8Hmgu1YHNnWw0GeA= +github.com/antchfx/htmlquery v1.3.5 h1:aYthDDClnG2a2xePf6tys/UyyM/kRcsFRm+ifhFKoU0= +github.com/antchfx/htmlquery v1.3.5/go.mod h1:5oyIPIa3ovYGtLqMPNjBF2Uf25NPCKsMjCnQ8lvjaoA= +github.com/antchfx/xmlquery v1.5.0 h1:uAi+mO40ZWfyU6mlUBxRVvL6uBNZ6LMU4M3+mQIBV4c= +github.com/antchfx/xmlquery v1.5.0/go.mod h1:lJfWRXzYMK1ss32zm1GQV3gMIW/HFey3xDZmkP1SuNc= +github.com/antchfx/xpath v1.3.5 h1:PqbXLC3TkfeZyakF5eeh3NTWEbYl4VHNVeufANzDbKQ= +github.com/antchfx/xpath v1.3.5/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs= +github.com/clbanning/mxj v1.8.4 h1:HuhwZtbyvyOw+3Z1AowPkU87JkJUSv751ELWaiTpj8I= +github.com/clbanning/mxj v1.8.4/go.mod h1:BVjHeAH+rl9rs6f+QIpeRl0tfu10SXn1pUSa5PVGJng= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8= +github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= +github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k= +github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= +github.com/fxamacker/cbor/v2 v2.9.0 h1:NpKPmjDBgUfBms6tr6JZkTHtfFGcMKsw3eGcmD/sapM= +github.com/fxamacker/cbor/v2 v2.9.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ= +github.com/go-sql-driver/mysql v1.9.3 h1:U/N249h2WzJ3Ukj8SowVFjdtZKfu9vlLZxjPXV1aweo= +github.com/go-sql-driver/mysql v1.9.3/go.mod h1:qn46aNg1333BRMNU69Lq93t8du/dwxI64Gl8i5p1WMU= +github.com/go-viper/mapstructure/v2 v2.4.0 h1:EBsztssimR/CONLSZZ04E8qAkxNYq4Qp9LvH92wZUgs= +github.com/go-viper/mapstructure/v2 v2.4.0/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM= +github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y= +github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8= +github.com/goccy/go-json v0.10.5 h1:Fq85nIqj+gXn/S5ahsiTlK3TmC85qgirsdTP/+DeaC4= +github.com/goccy/go-json v0.10.5/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M= +github.com/gocolly/colly v1.2.0 h1:qRz9YAn8FIH0qzgNUw+HT9UN7wm1oF9OBAilwEWpyrI= +github.com/gocolly/colly v1.2.0/go.mod h1:Hof5T3ZswNVsOHYmba1u03W65HDWgpV5HifSuueE0EA= +github.com/gofiber/fiber/v3 v3.0.0-rc.3 h1:h0KXuRHbivSslIpoHD1R/XjUsjcGwt+2vK0avFiYonA= +github.com/gofiber/fiber/v3 v3.0.0-rc.3/go.mod h1:LNBPuS/rGoUFlOyy03fXsWAeWfdGoT1QytwjRVNSVWo= +github.com/gofiber/schema v1.6.0 h1:rAgVDFwhndtC+hgV7Vu5ItQCn7eC2mBA4Eu1/ZTiEYY= +github.com/gofiber/schema v1.6.0/go.mod h1:WNZWpQx8LlPSK7ZaX0OqOh+nQo/eW2OevsXs1VZfs/s= +github.com/gofiber/utils/v2 v2.0.0-rc.2 h1:NvJTf7yMafTq16lUOJv70nr+HIOLNQcvGme/X+ftbW8= +github.com/gofiber/utils/v2 v2.0.0-rc.2/go.mod h1:gXins5o7up+BQFiubmO8aUJc/+Mhd7EKXIiAK5GBomI= +github.com/golang-jwt/jwt/v5 v5.2.3/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk= +github.com/golang-sql/civil v0.0.0-20220223132316-b832511892a9 h1:au07oEsX2xN0ktxqI+Sida1w446QrXBRJ0nee3SNZlA= +github.com/golang-sql/civil v0.0.0-20220223132316-b832511892a9/go.mod h1:8vg3r2VgvsThLBIFL93Qb5yWzgyZWhEmBwUJWevAkK0= +github.com/golang-sql/sqlexp v0.1.0 h1:ZCD6MBpcuOVfGVqsEmY5/4FtYiKz6tSyUv9LPEDei6A= +github.com/golang-sql/sqlexp v0.1.0/go.mod h1:J4ad9Vo8ZCWQ2GMrC4UCQy1JpCbwU9m3EOqtpKwwwHI= +github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 h1:f+oWsMOmNPc8JmEHVZIycC7hBoQxHH9pNKQORJNozsQ= +github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8/go.mod h1:wcDNUvekVysuuOpQKo3191zZyTpiI6se1N1ULghS0sw= +github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= +github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= +github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= +github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/google/go-querystring v1.0.0 h1:Xkwi/a1rcvNg1PPYe5vI8GbeBY/jrVuDX5ASuANWTrk= +github.com/google/go-querystring v1.0.0/go.mod h1:odCYkC5MyYFN7vkCjXpyrEuKhc/BUO6wN/zVPAxq5ck= +github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= +github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM= +github.com/jackc/pgpassfile v1.0.0/go.mod h1:CEx0iS5ambNFdcRtxPj5JhEz+xB6uRky5eyVu/W2HEg= +github.com/jackc/pgservicefile v0.0.0-20231201235250-de7065d80cb9 h1:L0QtFUgDarD7Fpv9jeVMgy/+Ec0mtnmYuImjTz6dtDA= +github.com/jackc/pgservicefile v0.0.0-20231201235250-de7065d80cb9/go.mod h1:5TJZWKEWniPve33vlWYSoGYefn3gLQRzjfDlhSJ9ZKM= +github.com/jackc/pgx/v5 v5.5.5 h1:amBjrZVmksIdNjxGW/IiIMzxMKZFelXbUoPNb+8sjQw= +github.com/jackc/pgx/v5 v5.5.5/go.mod h1:ez9gk+OAat140fv9ErkZDYFWmXLfV+++K0uAOiwgm1A= +github.com/jackc/puddle/v2 v2.2.1 h1:RhxXJtFG022u4ibrCSMSiu5aOq1i77R3OHKNJj77OAk= +github.com/jackc/puddle/v2 v2.2.1/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4= +github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E= +github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkryuEj+Srlc= +github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ= +github.com/jinzhu/now v1.1.5/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8= +github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0= +github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= +github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= +github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/kennygrant/sanitize v1.2.4 h1:gN25/otpP5vAsO2djbMhF/LQX6R7+O1TB4yv8NzpJ3o= +github.com/kennygrant/sanitize v1.2.4/go.mod h1:LGsjYYtgxbetdg5owWB2mpgUL6e2nfw2eObZ0u0qvak= +github.com/klauspost/compress v1.18.1 h1:bcSGx7UbpBqMChDtsF28Lw6v/G94LPrrbMbdC3JH2co= +github.com/klauspost/compress v1.18.1/go.mod h1:ZQFFVG+MdnR0P+l6wpXgIL4NTtwiKIdBnrBd8Nrxr+0= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/mattn/go-colorable v0.1.14 h1:9A9LHSqF/7dyVVX6g0U9cwm9pG3kP9gSzcuIPHPsaIE= +github.com/mattn/go-colorable v0.1.14/go.mod h1:6LmQG8QLFO4G5z1gPvYEzlUgJ2wF+stgPZH1UqBm1s8= +github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= +github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/mattn/go-sqlite3 v1.14.15/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg= +github.com/mattn/go-sqlite3 v1.14.16/go.mod h1:2eHXhiwb8IkHr+BDWZGa96P6+rkvnG63S2DGjv9HUNg= +github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o4kU= +github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= +github.com/microsoft/go-mssqldb v1.7.2 h1:CHkFJiObW7ItKTJfHo1QX7QBBD1iV+mn1eOyRP3b/PA= +github.com/microsoft/go-mssqldb v1.7.2/go.mod h1:kOvZKUdrhhFQmxLZqbwUV0rHkNkZpthMITIb2Ko1IoA= +github.com/mitchellh/mapstructure v1.4.3 h1:OVowDSCllw/YjdLkam3/sm7wEtOy59d8ndGgCcyj8cs= +github.com/mitchellh/mapstructure v1.4.3/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= +github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= +github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= +github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= +github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/mozillazg/go-httpheader v0.2.1 h1:geV7TrjbL8KXSyvghnFm+NyTux/hxwueTSrwhe88TQQ= +github.com/mozillazg/go-httpheader v0.2.1/go.mod h1:jJ8xECTlalr6ValeXYdOF8fFUISeBAdw6E61aqQma60= +github.com/mozillazg/go-pinyin v0.21.0 h1:Wo8/NT45z7P3er/9YSLHA3/kjZzbLz5hR7i+jGeIGao= +github.com/mozillazg/go-pinyin v0.21.0/go.mod h1:iR4EnMMRXkfpFVV5FMi4FNB6wGq9NV6uDWbUuPhP4Yc= +github.com/pelletier/go-toml/v2 v2.2.4 h1:mye9XuhQ6gvn5h28+VilKrrPoQVanw5PMw/TB0t5Ec4= +github.com/pelletier/go-toml/v2 v2.2.4/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY= +github.com/philhofer/fwd v1.2.0 h1:e6DnBTl7vGY+Gz322/ASL4Gyp1FspeMvx1RNDoToZuM= +github.com/philhofer/fwd v1.2.0/go.mod h1:RqIHx9QI14HlwKwm98g9Re5prTQ6LdeRQn+gXJFxsJM= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/richardlehane/mscfb v1.0.4 h1:WULscsljNPConisD5hR0+OyZjwK46Pfyr6mPu5ZawpM= +github.com/richardlehane/mscfb v1.0.4/go.mod h1:YzVpcZg9czvAuhk9T+a3avCpcFPMUWm7gK3DypaEsUk= +github.com/richardlehane/msoleps v1.0.1/go.mod h1:BWev5JBpU9Ko2WAgmZEuiz4/u3ZYTKbjLycmwiWUfWg= +github.com/richardlehane/msoleps v1.0.4 h1:WuESlvhX3gH2IHcd8UqyCuFY5yiq/GR/yqaSM/9/g00= +github.com/richardlehane/msoleps v1.0.4/go.mod h1:BWev5JBpU9Ko2WAgmZEuiz4/u3ZYTKbjLycmwiWUfWg= +github.com/rogpeppe/go-internal v1.9.0 h1:73kH8U+JUqXU8lRuOHeVHaa/SZPifC7BkcraZVejAe8= +github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= +github.com/rs/dnscache v0.0.0-20230804202142-fc85eb664529/go.mod h1:qe5TWALJ8/a1Lqznoc5BDHpYX/8HU60Hm2AwRmqzxqA= +github.com/sagikazarmark/locafero v0.12.0 h1:/NQhBAkUb4+fH1jivKHWusDYFjMOOKU88eegjfxfHb4= +github.com/sagikazarmark/locafero v0.12.0/go.mod h1:sZh36u/YSZ918v0Io+U9ogLYQJ9tLLBmM4eneO6WwsI= +github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d h1:hrujxIzL1woJ7AwssoOcM/tq5JjjG2yYOc8odClEiXA= +github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU= +github.com/shamaton/msgpack/v2 v2.4.0 h1:O5Z08MRmbo0lA9o2xnQ4TXx6teJbPqEurqcCOQ8Oi/4= +github.com/shamaton/msgpack/v2 v2.4.0/go.mod h1:6khjYnkx73f7VQU7wjcFS9DFjs+59naVWJv1TB7qdOI= +github.com/spf13/afero v1.15.0 h1:b/YBCLWAJdFWJTN9cLhiXXcD7mzKn9Dm86dNnfyQw1I= +github.com/spf13/afero v1.15.0/go.mod h1:NC2ByUVxtQs4b3sIUphxK0NioZnmxgyCrfzeuq8lxMg= +github.com/spf13/cast v1.10.0 h1:h2x0u2shc1QuLHfxi+cTJvs30+ZAHOGRic8uyGTDWxY= +github.com/spf13/cast v1.10.0/go.mod h1:jNfB8QC9IA6ZuY2ZjDp0KtFO2LZZlg4S/7bzP6qqeHo= +github.com/spf13/pflag v1.0.10 h1:4EBh2KAYBwaONj6b2Ye1GiHfwjqyROoF4RwYO+vPwFk= +github.com/spf13/pflag v1.0.10/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/spf13/viper v1.21.0 h1:x5S+0EU27Lbphp4UKm1C+1oQO+rKx36vfCoaVebLFSU= +github.com/spf13/viper v1.21.0/go.mod h1:P0lhsswPGWD/1lZJ9ny3fYnVqxiegrlNrEmgLjbTCAY= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8= +github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU= +github.com/temoto/robotstxt v1.1.2 h1:W2pOjSJ6SWvldyEuiFXNxz3xZ8aiWX5LbfDiOFd7Fxg= +github.com/temoto/robotstxt v1.1.2/go.mod h1:+1AmkuG3IYkh1kv0d2qEB9Le88ehNO0zwOr3ujewlOo= +github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/common v1.0.563/go.mod h1:7sCQWVkxcsR38nffDW057DRGk8mUjK1Ing/EFOK8s8Y= +github.com/tencentcloud/tencentcloud-sdk-go/tencentcloud/kms v1.0.563/go.mod h1:uom4Nvi9W+Qkom0exYiJ9VWJjXwyxtPYTkKkaLMlfE0= +github.com/tencentyun/cos-go-sdk-v5 v0.7.71 h1:dV0doQK6k0MTdNIIWqP23ESvlPPI1ZZCCIBZGjsWR2Y= +github.com/tencentyun/cos-go-sdk-v5 v0.7.71/go.mod h1:STbTNaNKq03u+gscPEGOahKzLcGSYOj6Dzc5zNay7Pg= +github.com/tencentyun/qcloud-cos-sts-sdk v0.0.0-20250515025012-e0eec8a5d123/go.mod h1:b18KQa4IxHbxeseW1GcZox53d7J0z39VNONTxvvlkXw= +github.com/tidwall/gjson v1.18.0 h1:FIDeeyB800efLX89e5a8Y0BNH+LOngJyGrIWxG2FKQY= +github.com/tidwall/gjson v1.18.0/go.mod h1:/wbyibRr2FHMks5tjHJ5F8dMZh3AcwJEMf5vlfC0lxk= +github.com/tidwall/match v1.1.1/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM= +github.com/tidwall/match v1.2.0 h1:0pt8FlkOwjN2fPt4bIl4BoNxb98gGHN2ObFEDkrfZnM= +github.com/tidwall/match v1.2.0/go.mod h1:eRSPERbgtNPcGhD8UCthc6PmLEQXEWd3PRB5JTxsfmM= +github.com/tidwall/pretty v1.2.0/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= +github.com/tidwall/pretty v1.2.1 h1:qjsOFOWWQl+N3RsoF5/ssm1pHmJJwhjlSbZ51I6wMl4= +github.com/tidwall/pretty v1.2.1/go.mod h1:ITEVvHYasfjBbM0u2Pg8T2nJnzm8xPwvNhhsoaGGjNU= +github.com/tiendc/go-deepcopy v1.7.1 h1:LnubftI6nYaaMOcaz0LphzwraqN8jiWTwm416sitff4= +github.com/tiendc/go-deepcopy v1.7.1/go.mod h1:4bKjNC2r7boYOkD2IOuZpYjmlDdzjbpTRyCx+goBCJQ= +github.com/tinylib/msgp v1.5.0 h1:GWnqAE54wmnlFazjq2+vgr736Akg58iiHImh+kPY2pc= +github.com/tinylib/msgp v1.5.0/go.mod h1:cvjFkb4RiC8qSBOPMGPSzSAx47nAsfhLVTCZZNuHv5o= +github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw= +github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= +github.com/valyala/fasthttp v1.68.0 h1:v12Nx16iepr8r9ySOwqI+5RBJ/DqTxhOy1HrHoDFnok= +github.com/valyala/fasthttp v1.68.0/go.mod h1:5EXiRfYQAoiO/khu4oU9VISC/eVY6JqmSpPJoHCKsz4= +github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= +github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= +github.com/xuri/efp v0.0.1 h1:fws5Rv3myXyYni8uwj2qKjVaRP30PdjeYe2Y6FDsCL8= +github.com/xuri/efp v0.0.1/go.mod h1:ybY/Jr0T0GTCnYjKqmdwxyxn2BQf2RcQIIvex5QldPI= +github.com/xuri/excelize/v2 v2.10.0 h1:8aKsP7JD39iKLc6dH5Tw3dgV3sPRh8uRVXu/fMstfW4= +github.com/xuri/excelize/v2 v2.10.0/go.mod h1:SC5TzhQkaOsTWpANfm+7bJCldzcnU/jrhqkTi/iBHBU= +github.com/xuri/nfp v0.0.2-0.20250530014748-2ddeb826f9a9 h1:+C0TIdyyYmzadGaL/HBLbf3WdLgC29pgyhTjAT/0nuE= +github.com/xuri/nfp v0.0.2-0.20250530014748-2ddeb826f9a9/go.mod h1:WwHg+CVyzlv/TX9xqBFXEZAuxOPxn2k1GNHwG41IIUQ= +github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU= +github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc= +go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/crypto v0.13.0/go.mod h1:y6Z2r+Rw4iayiXXAIxJIDAJ1zMW4yaTpebo8fPOliYc= +golang.org/x/crypto v0.19.0/go.mod h1:Iy9bg/ha4yyC70EfRS8jz+B6ybOBKMaSxLj6P6oBDfU= +golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8= +golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk= +golang.org/x/crypto v0.45.0 h1:jMBrvKuj23MTlT0bQEOBcAE0mjg8mK9RXFhRH6nyF3Q= +golang.org/x/crypto v0.45.0/go.mod h1:XTGrrkGJve7CYK7J8PEww4aY7gM3qMCElcJQ8n8JdX4= +golang.org/x/image v0.25.0 h1:Y6uW6rH1y5y/LK1J8BPWZtr6yZ7hrsy6hFrXjgsc2fQ= +golang.org/x/image v0.25.0/go.mod h1:tCAmOEGthTtkalusGp1g3xa2gke8J6c2N565dTyl9Rs= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/mod v0.8.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= +golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= +golang.org/x/mod v0.15.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= +golang.org/x/mod v0.30.0 h1:fDEXFVZ/fmCKProc/yAXXUijritrDzahmwwefnjoPFk= +golang.org/x/mod v0.30.0/go.mod h1:lAsf5O2EvJeSFMiBxXDki7sCgAxEUcZHXoXMKT4GJKc= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= +golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg= +golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk= +golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44= +golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= +golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4= +golang.org/x/net v0.47.0 h1:Mx+4dIFzqraBXUugkia1OOvlD6LemFo1ALMHjrXDOhY= +golang.org/x/net v0.47.0/go.mod h1:/jNxtkgq5yWUGYkaZGqo27cfGZ1c5Nen03aYrrKpVRU= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.3.0/go.mod h1:FU7BRWz2tNW+3quACPkgCx/L+uEAv1htQ0V83Z9Rj+Y= +golang.org/x/sync v0.6.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.18.0 h1:kr88TuHDroi+UVf+0hZnirlk8o8T+4MrK6mr60WkH/I= +golang.org/x/sync v0.18.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc= +golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/telemetry v0.0.0-20240228155512-f48c80bd79b2/go.mod h1:TeRTkGYfJXctD9OcfyVLyj2J3IxLnKwHJR8f4D8a3YE= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= +golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo= +golang.org/x/term v0.12.0/go.mod h1:owVbMEjm3cBLCHdkQu9b1opXd4ETQWc3BhuQGKgXgvU= +golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk= +golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY= +golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= +golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= +golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= +golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= +golang.org/x/text v0.31.0 h1:aC8ghyu4JhP8VojJ2lEHBnochRno1sgL6nEi9WGFGMM= +golang.org/x/text v0.31.0/go.mod h1:tKRAlv61yKIjGGHX/4tP1LTbc13YSec1pxVEWXzfoeM= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/tools v0.6.0/go.mod h1:Xwgl3UAJ/d3gWutnCtw505GrjyAbvKui8lOU390QaIU= +golang.org/x/tools v0.13.0/go.mod h1:HvlwmtVNQAhOuCjW7xxvovg8wbNq7LwfXh/k7wXUl58= +golang.org/x/tools v0.21.1-0.20240508182429-e35e4ccd0d2d/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk= +golang.org/x/tools v0.39.0 h1:ik4ho21kwuQln40uelmciQPp9SipgNDdrafrYA4TmQQ= +golang.org/x/tools v0.39.0/go.mod h1:JnefbkDPyD8UU2kI5fuf8ZX4/yUeh9W877ZeBONxUqQ= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/appengine v1.6.8 h1:IhEN5q69dyKagZPYMSdIjS2HqprW324FRQZJcGqPAsM= +google.golang.org/appengine v1.6.8/go.mod h1:1jJ3jBArFh5pcgW8gCtRJnepW8FzD1V44FJffLiz/Ds= +google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= +google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= +google.golang.org/protobuf v1.36.10 h1:AYd7cD/uASjIL6Q9LiTjz8JLcrh/88q5UObnmY3aOOE= +google.golang.org/protobuf v1.36.10/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gorm.io/datatypes v1.2.7 h1:ww9GAhF1aGXZY3EB3cJPJ7//JiuQo7DlQA7NNlVaTdk= +gorm.io/datatypes v1.2.7/go.mod h1:M2iO+6S3hhi4nAyYe444Pcb0dcIiOMJ7QHaUXxyiNZY= +gorm.io/driver/mysql v1.6.0 h1:eNbLmNTpPpTOVZi8MMxCi2aaIm0ZpInbORNXDwyLGvg= +gorm.io/driver/mysql v1.6.0/go.mod h1:D/oCC2GWK3M/dqoLxnOlaNKmXz8WNTfcS9y5ovaSqKo= +gorm.io/driver/postgres v1.5.0 h1:u2FXTy14l45qc3UeCJ7QaAXZmZfDDv0YrthvmRq1l0U= +gorm.io/driver/postgres v1.5.0/go.mod h1:FUZXzO+5Uqg5zzwzv4KK49R8lvGIyscBOqYrtI1Ce9A= +gorm.io/driver/sqlite v1.5.0/go.mod h1:kDMDfntV9u/vuMmz8APHtHF0b4nyBB7sfCieC6G8k8I= +gorm.io/driver/sqlite v1.6.0 h1:WHRRrIiulaPiPFmDcod6prc4l2VGVWHz80KspNsxSfQ= +gorm.io/driver/sqlite v1.6.0/go.mod h1:AO9V1qIQddBESngQUKWL9yoH93HIeA1X6V633rBwyT8= +gorm.io/driver/sqlserver v1.6.0 h1:VZOBQVsVhkHU/NzNhRJKoANt5pZGQAS1Bwc6m6dgfnc= +gorm.io/driver/sqlserver v1.6.0/go.mod h1:WQzt4IJo/WHKnckU9jXBLMJIVNMVeTu25dnOzehntWw= +gorm.io/gen v0.3.27 h1:ziocAFLpE7e0g4Rum69pGfB9S6DweTxK8gAun7cU8as= +gorm.io/gen v0.3.27/go.mod h1:9zquz2xD1f3Eb/eHq4oLn2z6vDVvQlCY5S3uMBLv4EA= +gorm.io/gorm v1.24.7-0.20230306060331-85eaf9eeda11/go.mod h1:L4uxeKpfBml98NYqVqwAdmV1a2nBtAec/cf3fpucW/k= +gorm.io/gorm v1.25.0/go.mod h1:L4uxeKpfBml98NYqVqwAdmV1a2nBtAec/cf3fpucW/k= +gorm.io/gorm v1.31.1 h1:7CA8FTFz/gRfgqgpeKIBcervUn3xSyPUmr6B2WXJ7kg= +gorm.io/gorm v1.31.1/go.mod h1:XyQVbO2k6YkOis7C2437jSit3SsDK72s7n7rsSHd+Gs= +gorm.io/hints v1.1.2 h1:b5j0kwk5p4+3BtDtYqqfY+ATSxjj+6ptPgVveuynn9o= +gorm.io/hints v1.1.2/go.mod h1:/ARdpUHAtyEMCh5NNi3tI7FsGh+Cj/MIUlvNxCNCFWg= +gorm.io/plugin/dbresolver v1.6.2 h1:F4b85TenghUeITqe3+epPSUtHH7RIk3fXr5l83DF8Pc= +gorm.io/plugin/dbresolver v1.6.2/go.mod h1:tctw63jdrOezFR9HmrKnPkmig3m5Edem9fdxk9bQSzM= diff --git a/main.go b/main.go new file mode 100644 index 0000000..8d58a92 --- /dev/null +++ b/main.go @@ -0,0 +1,45 @@ +package main + +import ( + "log" + "os" + + "github.com/gofiber/fiber/v3" + "github.com/gofiber/fiber/v3/middleware/logger" + "github.com/joho/godotenv" + + "fafa-crawler/src/controller" + // "fafa-crawler/src/services" +) + +func main() { + app := fiber.New() + + // 添加日志中间件 + app.Use(logger.New()) + + // Load environment variables from .env file + err := godotenv.Load() + if err != nil { + log.Fatalf("Error loading .env file") + } + + // Initialize controller and services + ctrl := controller.Controller{} + + // Set up routes + app.Get("/data", ctrl.GetData) + app.Post("/data", ctrl.PostData) + + // 美团数据同步商品库数据和图片 + app.Post("/meituan/data/sync", ctrl.SyncMeiTuanData) + // 思迅数据同步商品库数据和图片 + app.Post("/sixun/data/sync", ctrl.SyncSiXunGoodsData) + + // Start the server + port := os.Getenv("PORT") + if port == "" { + port = "3000" + } + log.Fatal(app.Listen(":"+port, fiber.ListenConfig{EnablePrefork: true})) +} diff --git a/output.html b/output.html new file mode 100644 index 0000000..b6d22b5 --- /dev/null +++ b/output.html @@ -0,0 +1,64 @@ +商品管理 + + + + + +
出售中
商品名称:
商品类型:
全部
商品分类:
全部
商品分组:
全部
高级筛选
出售中(-)
待出售(-)
已售罄(-)
仓库中(-)
库存紧张(-)
全部商品(-)
下架 分类 分组 更多
+
\ No newline at end of file diff --git a/runner.conf b/runner.conf new file mode 100644 index 0000000..dad2f49 --- /dev/null +++ b/runner.conf @@ -0,0 +1,14 @@ +root: . +tmp_path: ./tmp +build_name: runner-build +build_log: runner-build-errors.log +valid_ext: .go, .tpl, .tmpl, .html +no_rebuild_ext: .tpl, .tmpl, .html, .log, .gitignore +ignored: assets, tmp, vendor, testdata, media +build_delay: 600 +colors: 1 +log_color_main: cyan +log_color_build: yellow +log_color_runner: green +log_color_watcher: magenta +log_color_app: \ No newline at end of file diff --git a/src/beans/product_bean.go b/src/beans/product_bean.go new file mode 100644 index 0000000..043754c --- /dev/null +++ b/src/beans/product_bean.go @@ -0,0 +1,84 @@ +package beans + +import ( + "fafa-crawler/src/models" + "fafa-crawler/src/util" + "fmt" + "strconv" + "strings" +) + +type GoodsID struct { + ID string `json:"id"` +} + +type ProductBean struct { + Source string `json:"id"` + Name string `json:"title"` // 商品名 + Barcode string `json:"goods_code"` // 条形码/Barcode + Category string `json:"category"` // 商品分类 + Price string `json:"price"` // 商品原价 + Keywords string `json:"sub_title"` // 商品关键字 + Thumb string `json:"thumb"` // 商品缩略图 + ImageList []string `json:"thumbs"` // 商品图片列表 +} + +func (p *ProductBean) ConvProductModel() (*models.LibraryProduct, []*models.LibraryProductImage, error) { + priceFloat, err := strconv.ParseFloat(p.Price, 64) + if err != nil { + priceFloat = 0 // or handle error as needed + } + imgDomain := "http://shop.yunmadian.com/data/attachment/" + + product := &models.LibraryProduct{ + Source: fmt.Sprintf("heidouyun_%s", p.Source), + Name: p.Name, + Barcode: p.Barcode, + Category: p.Category, + Price: priceFloat, + Keywords: util.SplitTags2(p.Keywords), + } + if p.Thumb != "" { + product.Thumb = imgDomain + p.Thumb + } + + var productImages []*models.LibraryProductImage + if product.Thumb != "" { + productImages = append(productImages, &models.LibraryProductImage{ + ProductID: product.ID, + ImageURL: product.Thumb, + IsMain: 1, + }) + } + + for _, img := range p.ImageList { + if img != "" { + productImages = append(productImages, &models.LibraryProductImage{ + ProductID: product.ID, + ImageURL: imgDomain + img, + IsMain: 2, + }) + } + + } + + return product, productImages, nil +} + +func (p *ProductBean) IsValidBean() bool { + return p != nil && + !util.IsEmpty(p.Name) && + !strings.Contains(p.Name, "test") && + !strings.Contains(p.Name, "测试") && + len(p.ImageList) > 0 +} + +type ProductMeiTuanBean struct { + Product *models.LibraryProduct + ImageList []*models.LibraryProductImage +} + +type SiXunGoodsBean struct { + Product *models.LibraryProduct + ImageList []*models.LibraryProductImage +} diff --git a/src/colly_service/hdy_colly_service.go b/src/colly_service/hdy_colly_service.go new file mode 100644 index 0000000..21a41b1 --- /dev/null +++ b/src/colly_service/hdy_colly_service.go @@ -0,0 +1,216 @@ +package colly_service + +import ( + "fmt" + "log" + + "github.com/gocolly/colly" + jsoniter "github.com/json-iterator/go" + "github.com/tidwall/gjson" + + "fafa-crawler/src/beans" +) + +type HdyCollyService struct { +} + +func NewHdyCollyService() *HdyCollyService { + return &HdyCollyService{} +} + +func (s *HdyCollyService) initCollyCollector(cookieVal string) *colly.Collector { + collyCollector = colly.NewCollector( + colly.AllowURLRevisit(), + colly.IgnoreRobotsTxt(), + colly.AllowURLRevisit(), + colly.Async(false), + ) + + // 设置请求头 + collyCollector.OnRequest(func(r *colly.Request) { + r.Headers.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/137.0.0.0 Safari/537.36") + r.Headers.Set("content-type", "application/json; charset=UTF-8") + r.Headers.Set("Accept", "application/json, text/plain, */*") + r.Headers.Set("Accept-Encoding", "gzip, deflate, br") + r.Headers.Set("Accept-Language", "zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7") + r.Headers.Set("Referer", "https://shop.yunmadian.com/shop/manage/goods/list") + r.Headers.Set("X-Requested-With", "XMLHttpRequest") + r.Headers.Set("Cookie", cookieVal) + }) + + // 限速 + // collyCollector.Limit(&colly.LimitRule{ + // DomainGlob: "*yunmadian.com", + // DomainRegexp: "", + // Delay: 4 * time.Second, + // RandomDelay: 2, + // Parallelism: 1, + // }) + + return collyCollector +} + +// [{"id":"17511"},{"id":"17510"},{"id":"17509"},{"id":"17508"},{"id":"16626"},{"id":"16576"},{"id":"15849"},{"id":"15786"},{"id":"15647"},{"id":"15646"},{"id":"15645"},{"id":"15644"},{"id":"15643"},{"id":"15642"},{"id":"15641"},{"id":"15640"},{"id":"15639"},{"id":"15638"},{"id":"15637"},{"id":"15636"}] +func (s *HdyCollyService) StartCrawlProduct(cookieVal string, startPage, endPage int) []*beans.ProductBean { + fmt.Println("Initializing StartCrawlProduct...") + + if startPage < 1 || startPage > 187 { + return nil + } + + if endPage < startPage { + return nil + } + + productColly := s.initCollyCollector(cookieVal) + detailColly := s.initCollyCollector(cookieVal) + + productBeanList := []*beans.ProductBean{} + + productColly.OnRequest(func(r *colly.Request) { + fmt.Println("列表地址:", r.URL) + }) + + productColly.OnResponse(func(r *colly.Response) { + // log.Println("详情响应状态码:", string(r.Body)) + ids, err := ParseGoodsIDs(r) + if err != nil { + log.Println("解析商品ID失败:", err) + return + } + + // log.Println("商品ID列表:", ids) + + // 获取详情 + productBeanListTemp := s.StartCrawlDetail(detailColly, ids) + + if len(productBeanListTemp) > 0 { + productBeanList = append(productBeanList, productBeanListTemp...) + } + }) + + productColly.OnError(func(r *colly.Response, err error) { + log.Printf("详情页请求失败: %v, URL: %s\n", err, r.Request.URL) + }) + + for i := startPage; i <= endPage; i++ { + reqURL := fmt.Sprintf("https://shop.yunmadian.com/shop/manage/goods/list?status=1&page=%d&pageSize=20", i) + productColly.Visit(reqURL) + } + + productColly.Wait() + + return productBeanList + +} + +func (s *HdyCollyService) StartCrawlDetail(detailColly *colly.Collector, ids []*beans.GoodsID) []*beans.ProductBean { + fmt.Println("StartCrawlDetail...") + + cnt := len(ids) + if cnt <= 0 { + fmt.Println("没有商品ID") + return nil + } + + ids = FilterDuplicateGoodsIDs(ids) + + productBeanList := []*beans.ProductBean{} + + detailColly.OnRequest(func(r *colly.Request) { + fmt.Println("详情地址:", r.URL) + }) + + detailColly.OnResponse(func(r *colly.Response) { + // log.Println("详情响应内容:", string(r.Body)) + productBean, err := ParseProductBean(r) + if err != nil { + fmt.Println("解析商品信息失败:", err) + return + } + productBeanList = append(productBeanList, productBean) + }) + + detailColly.OnError(func(r *colly.Response, err error) { + log.Printf("详情页请求失败: %v, URL: %s\n", err, r.Request.URL) + }) + + for _, i := range ids { + urlPath := "https://shop.yunmadian.com/shop/manage/goods/edit?goods_id=" + fmt.Sprint(i.ID) + detailColly.Visit(urlPath) + // log.Println("正在爬取详情页:", urlPath) + } + + detailColly.Wait() + + return productBeanList +} + +func ParseGoodsIDs(response *colly.Response) ([]*beans.GoodsID, error) { + // 1. 解析 JSON + var data = gjson.Parse(string(response.Body)) + + // 2. 检查 error 字段 + if data.Get("error").Int() != 0 && !data.Get("list").Exists() { + return nil, fmt.Errorf("数据不符合") + } + + jsonData := data.Get("list").String() + // fmt.Println(jsonData) + + // 3. 转换为 ProductBean 对象 + var goodsIDs []*beans.GoodsID + err := jsoniter.UnmarshalFromString(jsonData, &goodsIDs) + if err != nil { + return nil, fmt.Errorf("转换为 goodsIDs 对象失败: %w", err) + } + + if len(goodsIDs) <= 0 { + return nil, fmt.Errorf("无数据") + } + + return goodsIDs, nil +} + +func ParseProductBean(response *colly.Response) (*beans.ProductBean, error) { + // 1. 解析 JSON + var data = gjson.Parse(string(response.Body)) + // err := json.Unmarshal(response.Body, &data) + + // fmt.Println("####", data) + + // 2. 检查 error 字段 + if data.Get("error").Int() != 0 && !data.Get("goods").Exists() { + return nil, fmt.Errorf("error 字段不为 0") + } + + jsonData := data.Get("goods").String() + // fmt.Println(data.Get("goods").Get("title").String()) + + // 3. 转换为 ProductBean 对象 + var productBean beans.ProductBean + err := jsoniter.UnmarshalFromString(jsonData, &productBean) + if err != nil { + return nil, fmt.Errorf("转换为 ProductBean 对象失败: %w", err) + } + + if !productBean.IsValidBean() { + return nil, fmt.Errorf("无效数据") + } + + return &productBean, nil +} + +func FilterDuplicateGoodsIDs(goodsIDs []*beans.GoodsID) []*beans.GoodsID { + seen := make(map[string]bool) + result := []*beans.GoodsID{} + + for _, goodsID := range goodsIDs { + if _, ok := seen[goodsID.ID]; !ok { + seen[goodsID.ID] = true + result = append(result, goodsID) + } + } + + return result +} diff --git a/src/colly_service/init.go b/src/colly_service/init.go new file mode 100644 index 0000000..81189df --- /dev/null +++ b/src/colly_service/init.go @@ -0,0 +1,7 @@ +package colly_service + +import "github.com/gocolly/colly" + +var ( + collyCollector *colly.Collector +) diff --git a/src/controller/hdy_controller.go b/src/controller/hdy_controller.go new file mode 100644 index 0000000..4127208 --- /dev/null +++ b/src/controller/hdy_controller.go @@ -0,0 +1,31 @@ +// 黑豆云商品抓取 +package controller + +import ( + "github.com/gofiber/fiber/v3" +) + +type Controller struct { +} + +func NewController() *Controller { + return &Controller{} +} + +func (c *Controller) GetData(ctx fiber.Ctx) error { + //https://shop.yunmadian.com/shop/manage/goods/edit?goods_id=14479 + productBeanList := hdyColly.StartCrawlProduct( + "eweishop-user=p2ameerhkfmafgbm9ai5qj38f6; shopId=355; warehose_id=0; storeId=; is_expired=1", + 121, + 187, + ) + for _, v := range productBeanList { + productService.SaveProduct(v) + } + + return ctx.JSON(len(productBeanList)) +} + +func (c *Controller) PostData(ctx fiber.Ctx) error { + return ctx.Status(fiber.StatusCreated).SendString("Data saved successfully") +} diff --git a/src/controller/init.go b/src/controller/init.go new file mode 100644 index 0000000..3a50020 --- /dev/null +++ b/src/controller/init.go @@ -0,0 +1,72 @@ +package controller + +import ( + "time" + + "fafa-crawler/src/colly_service" + "fafa-crawler/src/services" +) + +var ( + hdyColly = colly_service.NewHdyCollyService() + productService = services.NewProductService() +) + +// APIResponse 标准API响应结构体(符合国内大厂规范) +// 包含状态码、消息、数据和时间戳字段 +type APIResponse struct { + Code int `json:"code"` // 状态码: 0成功, 非0错误 + Msg string `json:"msg"` // 响应消息 + Data interface{} `json:"data"` // 业务数据 + Timestamp int64 `json:"timestamp"` // 服务器时间戳(秒级) +} + +// NewSuccessResponse 创建成功响应 +func NewSuccessResponse(data interface{}) *APIResponse { + return &APIResponse{ + Code: 0, + Msg: "success", + Data: data, + Timestamp: time.Now().Unix(), + } +} + +// NewErrorResponse 创建错误响应 +func NewErrorResponse(code int, msg string) *APIResponse { + return &APIResponse{ + Code: code, + Msg: msg, + Data: nil, + Timestamp: time.Now().Unix(), + } +} + +// Success 简化成功响应(仅数据) +func Success(data interface{}) *APIResponse { + return NewSuccessResponse(data) +} + +// SuccessMsg 简化成功响应(仅消息) +func SuccessMsg(msg string) *APIResponse { + return &APIResponse{ + Code: 0, + Msg: msg, + Data: nil, + Timestamp: time.Now().Unix(), + } +} + +// SuccessEmpty 无参数成功响应 +func SuccessEmpty() *APIResponse { + return Success(nil) +} + +// Error 简化错误响应(仅消息) +func Error(code int, msg string) *APIResponse { + return NewErrorResponse(code, msg) +} + +// ErrorEmpty 无参数错误响应 +func ErrorEmpty() *APIResponse { + return Error(1, "操作失败") +} diff --git a/src/controller/meituan_controller.go b/src/controller/meituan_controller.go new file mode 100644 index 0000000..ab27836 --- /dev/null +++ b/src/controller/meituan_controller.go @@ -0,0 +1,575 @@ +package controller + +import ( + "fmt" + "log" + "os" + "path/filepath" + "regexp" + "strconv" + "strings" + "sync" + "time" + + "github.com/gofiber/fiber/v3" + "github.com/xuri/excelize/v2" + + "fafa-crawler/src/beans" + "fafa-crawler/src/models" + "fafa-crawler/src/util" +) + +const ( + // 处理文件的总文件夹 + processFilePath = "/Users/panjunjie/Downloads/电商商品图片库/" +) + +var ( + resultMu sync.Mutex + wg sync.WaitGroup +) + +// SyncMeiTuanData 处理Excel文件上传请求,获取商品信息并处理图片 +func (c *Controller) SyncMeiTuanData(ctx fiber.Ctx) error { + start := time.Now() + // 直接解析请求参数(Fiber v3自动处理JSON/表单) + var params struct { + Folders []string `json:"folders" form:"folders"` // 图片源目录路径 + } + + if err := ctx.Bind().JSON(¶ms); err != nil { + return ctx.Status(fiber.StatusBadRequest).JSON(Error(fiber.StatusBadRequest, "无效的请求参数")) + } + if len(params.Folders) == 0 { + return ctx.Status(fiber.StatusBadRequest).JSON(Error(fiber.StatusBadRequest, "请提供图片源目录路径")) + } + + // 处理的图片保存路径 + saveImgRoot := "/Users/panjunjie/Downloads/meituan_img/" + + successCnt := 0 + + for _, folder := range params.Folders { + + path := processFilePath + folder + + // 读取并合并商品数据 + productList, err := ReadProductsFromExcel(path) + if err != nil { + return ctx.JSON(Error(1001, "读取Excel文件失败: "+err.Error())) + } + + productList2, err := ReadProductActivitiesFromExcel(path, productList) + if err != nil { + return ctx.JSON(Error(1002, "处理商品图片失败: "+err.Error())) + } + productList = append(productList, productList2...) + + // 批量处理商品图片 + productBeanList, err := batchCopyProductImages(productList, path, saveImgRoot) + if err != nil { + return ctx.JSON(Error(1003, "批量处理商品图片失败: "+err.Error())) + } + + // 批量保存数据 + productService.SaveMeiTuanProducts(productBeanList) + + successCnt += len(productBeanList) + } + + elapsed := time.Since(start) + + return ctx.JSON(Success(fiber.Map{ + "success_count": successCnt, + "folder_count": len(params.Folders), + "elapsed_time": elapsed.String(), + })) +} + +// readExcelData 读取Excel文件数据并返回指定工作表的行数据 +// 参数: filePath - Excel文件路径, sheetIndex - 工作表索引 +// 返回: 行数据列表和可能的错误 +func readExcelData(filePath string, sheetIndex int) ([][]string, error) { + excelFile, err := excelize.OpenFile(filePath) + if err != nil { + return nil, fmt.Errorf("打开excel文件失败: %w", err) + } + defer excelFile.Close() + + sheetList := excelFile.GetSheetList() + if len(sheetList) <= sheetIndex { + return nil, fmt.Errorf("工作表索引 %d 不存在,文件中共有 %d 个工作表", sheetIndex, len(sheetList)) + } + + sheetName := sheetList[sheetIndex] + log.Printf("处理工作表: %q", sheetName) + + rows, err := excelFile.GetRows(sheetName) + if err != nil { + return nil, fmt.Errorf("获取工作表行数据失败: %w", err) + } + + return rows, nil +} + +// 常量定义 +const ( + headerRowCount = 3 // 表头行数 +) + +// MeiTuanController 美团数据控制器 +type MeiTuanController struct { +} + +func NewMeiTuanController() *MeiTuanController { + return &MeiTuanController{} +} + +// 读取商品 Excel 文件填充结构体 +// ReadProductsFromExcel 从Excel文件读取产品数据并转换为模型列表 +// 参数: filePath - Excel文件路径 +// 返回: 产品模型列表和可能的错误 +func ReadProductsFromExcel(filePath string) ([]*models.LibraryProduct, error) { + startTime := time.Now() + // 打开Excel文件 + excelFileName := filePath + "/美团闪购.xlsx" + rows, err := readExcelData(excelFileName, 0) // 读取第二个工作表(索引1) + if err != nil { + return nil, fmt.Errorf("读取Excel文件 %s失败: %w", excelFileName, err) + } + + // 预分配空间,排除标题行(确保容量非负) + capacity := len(rows) - headerRowCount + if capacity < 0 { + capacity = 0 + } + productList := make([]*models.LibraryProduct, 0, capacity) + + titleClear := util.NewTitleCleaner() + + // 遍历每一行数据(跳过前3行标题和备注行) + for rowIdx, row := range rows { + // 跳过标题行和备注行(前3行) + if rowIdx < headerRowCount { + continue + } + + // 跳过空行 + if isEmptyRow(row) { + log.Printf("跳过空行: 第%d行", rowIdx+1) + continue + } + + // 创建产品模型 + product := &models.LibraryProduct{} + + // 解析单元格数据 + if err := parseRowToProduct(row, product, titleClear); err != nil { + log.Printf("解析行数据失败(第%d行): %v, 已跳过", rowIdx+1, err) + continue + } + + // 验证产品名称 + if product.Name == "" { + log.Printf("产品名称为空,跳过第%d行", rowIdx+1) + continue + } + + product.Source = "meituan" + productList = append(productList, product) + } + + elapsed := time.Since(startTime) + log.Printf("excel解析完成,共处理 %d 个产品,耗时 %v", len(productList), elapsed) + return productList, nil +} + +// isEmptyRow 判断是否为空行 +func isEmptyRow(row []string) bool { + for _, cell := range row { + if strings.TrimSpace(cell) != "" { + return false + } + } + return true +} + +// parseRowToProduct 将Excel行数据解析到产品模型 +func parseRowToProduct(row []string, product *models.LibraryProduct, titleClear *util.TitleCleaner) error { + // 列索引到处理函数的映射,仅包含需要处理的列 + cellHandlers := map[int]func(string, *models.LibraryProduct) error{ + 0: func(v string, p *models.LibraryProduct) error { // 条形码 + p.Barcode = emptyIf(v, "无条形码") + return nil + }, + 1: func(v string, p *models.LibraryProduct) error { // 产品名称 + p.Title = v + p.Name = titleClear.ExtractProductName(v) + p.Sname = p.Name + p.Thumb = titleClear.CleanForFileName(v) + return nil + }, + 2: func(v string, p *models.LibraryProduct) error { // 规格 + // 处理规格字符串:替换分隔符并提取关键规格信息 + p.Spec = strings.ReplaceAll(v, "#", ";") + newSpec := titleClear.ExtractSpec(v) + if p.Spec == "" && newSpec != "" { + p.Spec = newSpec + } + return nil + }, + 3: func(v string, p *models.LibraryProduct) error { // 价格 + if v == "" { + p.Price = 0 + return nil + } + price, err := strconv.ParseFloat(v, 64) + if err != nil { + // return fmt.Errorf("第%d行价格格式错误: %s", rowNumber, v) + price = 0 + } + p.Price = price + return nil + }, + 8: func(v string, p *models.LibraryProduct) error { // 分类 + p.Category = v + return nil + }, + 9: func(v string, p *models.LibraryProduct) error { // 品牌 + p.Brand = emptyIf(v, "无品牌") + return nil + }, + 10: func(v string, p *models.LibraryProduct) error { // 产地 + p.MadeIn = v + return nil + }, + 11: func(v string, p *models.LibraryProduct) error { // 重量 + p.Weight = v + return nil + }, + 12: func(v string, p *models.LibraryProduct) error { // 重量单位 + p.WeightUnit = v + return nil + }, + 14: func(v string, p *models.LibraryProduct) error { // 一级分类 + p.Category1St = v + return nil + }, + 15: func(v string, p *models.LibraryProduct) error { // 二级分类 + p.Category2Nd = v + return nil + }, + 19: func(v string, p *models.LibraryProduct) error { // 卖点 + p.SellingPoint = v + return nil + }, + 21: func(v string, p *models.LibraryProduct) error { // 介绍 + p.Intro = v + return nil + }, + } + + for i, cellValue := range row { + if handler, ok := cellHandlers[i]; ok { + if err := handler(strings.TrimSpace(cellValue), product); err != nil { + return err + } + } + } + return nil +} + +// parseRowToProductActivity 将Excel行数据解析到产品模型 +func parseRowToProductActivity(row []string, product *models.LibraryProduct, titleClear *util.TitleCleaner) error { + // 列索引到处理函数的映射,仅包含需要处理的列 + cellHandlers := map[int]func(string, *models.LibraryProduct) error{ + 1: func(v string, p *models.LibraryProduct) error { // 条形码 + p.Barcode = emptyIf(v, "无条形码") + return nil + + }, + 3: func(v string, p *models.LibraryProduct) error { // 产品名称 + p.Title = v + p.Name = titleClear.ExtractProductName(v) + p.Sname = p.Name + p.Thumb = titleClear.CleanForFileName(v) + + if p.Spec == "" { + p.Spec = titleClear.ExtractSpec(v) + } + + return nil + }, + 5: func(v string, p *models.LibraryProduct) error { // 价格 + price, err := strconv.ParseFloat(v, 64) + if err != nil { + price = 0 + } + p.Price = price + return nil + }, + } + + for i, cellValue := range row { + if handler, ok := cellHandlers[i]; ok { + if err := handler(strings.TrimSpace(cellValue), product); err != nil { + return err + } + } + } + return nil +} + +func emptyIf(value, match string) string { + if value == match { + return "" + } + return value +} + +// ReadProductActivitiesFromExcel 从Excel文件读取产品活动数据 +// 参数: filePath - Excel文件路径, oldProductList - 已有产品列表用于去重 +// 返回: 产品模型列表和可能的错误 +func ReadProductActivitiesFromExcel(filePath string, oldProductList []*models.LibraryProduct) ([]*models.LibraryProduct, error) { + startTime := time.Now() + // 创建产品索引映射用于快速去重 + productIndex := make(map[string]bool) + for _, p := range oldProductList { + productIndex[p.Barcode] = true + productIndex[p.Name] = true + } + + // 读取Excel数据 + excelFileName := filePath + "/美团闪购折扣.xlsx" + rows, err := readExcelData(excelFileName, 1) // 读取第二个工作表(索引1) + if err != nil { + return nil, fmt.Errorf("读取Excel文件 %s失败: %w", excelFileName, err) + } + + titleClear := util.NewTitleCleaner() + + // 预分配空间,排除标题行(确保容量非负) + capacity := len(rows) - headerRowCount + if capacity < 0 { + capacity = 0 + } + productList := make([]*models.LibraryProduct, 0, capacity) + + // 遍历行数据并解析 + for rowIdx, row := range rows { + if rowIdx < headerRowCount || isEmptyRow(row) { + continue + } + + product := &models.LibraryProduct{} + if err := parseRowToProductActivity(row, product, titleClear); err != nil { + log.Printf("解析行数据失败(第%d行): %v, 已跳过", rowIdx+1, err) + continue + } + + // 验证并检查重复 + if product.Name == "" { + //log.Printf("产品名称为空,跳过第%d行", rowIdx+1) + continue + } + + if productIndex[product.Barcode] || productIndex[product.Name] { + // log.Printf("产品已存在(条形码: %s, 名称: %s),跳过第%d行", product.Barcode, product.Name, rowIdx+1) + continue + } + + productList = append(productList, product) + productIndex[product.Barcode] = true + productIndex[product.Name] = true + } + + elapsed := time.Since(startTime) + log.Printf("活动excel解析完成,共处理 %d 个产品,耗时 %v", len(productList), elapsed) + + return productList, nil +} + +// batchCopyProductImages 批量复制商品图片,并将匹配的图片路径赋值给商品信息 +// 参数: +// +// productList - 商品列表 +// srcPath - 图片源目录路径 +// saveImgRoot - 图片保存目录路径 +// +// 返回: +// +// 包含商品和图片信息的列表,以及可能的错误 +func batchCopyProductImages(productList []*models.LibraryProduct, srcPath, saveImgRoot string) ([]*beans.ProductMeiTuanBean, error) { + startTime := time.Now() + result := make([]*beans.ProductMeiTuanBean, 0, len(productList)) + if len(productList) == 0 { + return result, nil + } + + // 1. 验证图片源目录 + if _, err := os.Stat(srcPath); os.IsNotExist(err) { + return nil, fmt.Errorf("目录不存在: %w", err) + } + + // 2. 合并分散的图片文件夹 + processor := util.NewImageProcessor(srcPath) + mergePath, err := processor.Merge() + if err != nil { + return nil, fmt.Errorf("图片合并失败: %w", err) + } + + // 3. 构建图片索引 (优化查询性能: O(N+M)替代O(N*M)) + imageFiles, err := os.ReadDir(mergePath) + if err != nil { + return nil, fmt.Errorf("读取图片目录失败: %w", err) + } + + // 图片索引: key=处理后的标题前缀, value=匹配的图片列表 + imageIndex := make(map[string][]string) + for _, file := range imageFiles { + if file.IsDir() { + continue + } + + imageName := file.Name() + imageNameExt := filepath.Ext(imageName) + imageNameWithoutExt := strings.TrimSuffix(imageName, imageNameExt) + + // 移除末尾的-数字后缀 (如"abc-1.jpg" → "abc") + processedName := regexp.MustCompile(`-\d+$`).ReplaceAllString(imageNameWithoutExt, "") + // 限制前缀长度,避免索引键过长 + prefix := processedName + if len(prefix) > 250 { + prefix = prefix[:250] + } + + imageIndex[prefix] = append(imageIndex[prefix], imageName) + } + + // log.Printf("图片字典长度:%v", imageIndex) + + // 4. 准备保存目录 + nowFolder := time.Now().Format("20060102") + saveRoot := filepath.Join(saveImgRoot, nowFolder) + virtualSaveRoot := filepath.Join("/", "media", "images", "goods_library", nowFolder) + if err := os.MkdirAll(saveRoot, 0755); err != nil { + return nil, fmt.Errorf("创建目录失败: %w", err) + } + + // 5. 并发处理商品图片 + for _, product := range productList { + if product.Title == "" { + continue + } + + wg.Add(1) + + go func(p *models.LibraryProduct) { + defer wg.Done() + + // 5.1 查找匹配的图片: 优先条形码匹配,再标题匹配 + matchedImages := imageIndex[p.Barcode] + + // p.Thumb 临时符合规范的图片名称 + matchKey := p.Thumb + if len(matchKey) > 250 { + matchKey = matchKey[:250] + } + matchedImages = append(matchedImages, imageIndex[matchKey]...) + + // 5.2 去重处理: 使用map确保图片唯一 + uniqueImages := make([]string, 0, len(matchedImages)) + seenImages := make(map[string]bool) + for _, img := range matchedImages { + if !seenImages[img] { + seenImages[img] = true + uniqueImages = append(uniqueImages, img) + } + } + matchedImages = uniqueImages + + // 如果商品匹配不到图片,则跳过 + if len(matchedImages) == 0 { + return + } + + // 5.3 确定主图: 优先匹配条形码/标题,无则取第一张 + var mainImage string + for _, imgName := range matchedImages { + fileExt := filepath.Ext(imgName) + nameWithoutExt := util.CleanProductForFilePath(strings.TrimSuffix(imgName, fileExt)) + if mainImage == "" && (nameWithoutExt == p.Barcode || nameWithoutExt == matchKey) { + mainImage = imgName + break + } + } + + if mainImage == "" { + mainImage = matchedImages[0] + } + + // 5.5 并发复制图片: 并行处理图片复制并生成ProductImage + var ( + productImages []*models.LibraryProductImage + copyWg sync.WaitGroup + copyMu sync.Mutex + ) + + thumbNewName := "" + + for idx, imgName := range matchedImages { + copyWg.Add(1) + go func(i int, img string) { + defer copyWg.Done() + fileExt := filepath.Ext(img) + imgNewName := util.GenerateUUID2FileName(fileExt, false) + absSavePath := filepath.Join(saveRoot, imgNewName) + if err := util.CopyFile(filepath.Join(mergePath, img), absSavePath); err != nil { + log.Printf("图片复制失败 %s: %v", img, err) + return + } + + var isMainImg int32 = 2 + if img == mainImage { + isMainImg = 1 + } + if isMainImg == 1 { + thumbNewName = imgNewName + } + + // 创建图片信息并添加到列表 + image := &models.LibraryProductImage{ + ProductID: p.ID, + ImageURL: fmt.Sprintf("%s/%s", virtualSaveRoot, imgNewName), + IsMain: isMainImg, + Seq: int32(i + 1), + } + + copyMu.Lock() + productImages = append(productImages, image) + copyMu.Unlock() + }(idx, imgName) + } + + copyWg.Wait() + + // 5.4 处理主图URL: 生成随机文件名并更新商品缩略图 + p.Thumb = fmt.Sprintf("%s/%s", virtualSaveRoot, thumbNewName) + + // 5.6 收集结果: 加锁保护结果切片 + resultMu.Lock() + result = append(result, &beans.ProductMeiTuanBean{ + Product: p, + ImageList: productImages, + }) + resultMu.Unlock() + + }(product) + } + wg.Wait() + + // 6. 输出处理结果: 记录耗时和处理数量 + elapsed := time.Since(startTime) + log.Printf("填充数据,并上传图片,实际共处理 %d 个产品,耗时 %v", len(result), elapsed) + return result, nil +} diff --git a/src/controller/sx_goods_controller.go b/src/controller/sx_goods_controller.go new file mode 100644 index 0000000..bfeafe0 --- /dev/null +++ b/src/controller/sx_goods_controller.go @@ -0,0 +1,71 @@ +package controller + +import ( + "fafa-crawler/src/services" + "log" + "time" + + "github.com/gofiber/fiber/v3" +) + +// SyncSiXunGoodsData 处理思迅爬取的数据,上传到小发同城图片库 +func (c *Controller) SyncSiXunGoodsData(ctx fiber.Ctx) error { + start := time.Now() + // 直接解析请求参数(Fiber v3自动处理JSON/表单) + var params struct { + Folders []string `json:"folders" form:"folders"` // 图片源目录路径 + } + + if err := ctx.Bind().JSON(¶ms); err != nil { + return ctx.Status(fiber.StatusBadRequest).JSON(Error(fiber.StatusBadRequest, "无效的请求参数")) + } + if len(params.Folders) == 0 { + return ctx.Status(fiber.StatusBadRequest).JSON(Error(fiber.StatusBadRequest, "请提供图片源目录路径")) + } + + sxGoodsService := services.NewSxGoodsService() + + successCnt := 0 + totalSrcDataCnt := int64(0) + totalFilteredDataCnt := int64(0) + totalSavedDataCnt := int64(0) + totalFilteredImgCnt := int64(0) + + for _, folder := range params.Folders { + path := processFilePath + folder + + // 处理思迅商品数据 + srcDataCnt, filteredDataCnt, savedDataCnt, filteredImgCnt := sxGoodsService.ProcessSiXunGoodsData(path) + + // 累计统计数据 + totalSrcDataCnt += srcDataCnt + totalFilteredDataCnt += filteredDataCnt + totalSavedDataCnt += savedDataCnt + totalFilteredImgCnt += filteredImgCnt + + // 如果成功保存了数据,则增加成功计数 + if savedDataCnt > 0 { + successCnt++ + } + + // 记录每个文件夹的处理日志 + log.Printf("文件夹 %s 处理完成: 源数据%d条,过滤后数据%d条,保存成功%d条,有效图片%d张", + folder, srcDataCnt, filteredDataCnt, savedDataCnt, filteredImgCnt) + } + + elapsed := time.Since(start) + + // 记录总体处理日志 + log.Printf("思迅商品数据同步完成: 处理%d个文件夹,源数据%d条,过滤后数据%d条,保存成功%d条,有效图片%d张,耗时%s", + len(params.Folders), totalSrcDataCnt, totalFilteredDataCnt, totalSavedDataCnt, totalFilteredImgCnt, elapsed.String()) + + return ctx.JSON(Success(fiber.Map{ + "success_count": successCnt, + "folder_count": len(params.Folders), + "elapsed_time": elapsed.String(), + "total_src_data": totalSrcDataCnt, + "total_filtered_data": totalFilteredDataCnt, + "total_saved_data": totalSavedDataCnt, + "total_filtered_img": totalFilteredImgCnt, + })) +} diff --git a/src/dbutil/mysql_gorm.go b/src/dbutil/mysql_gorm.go new file mode 100644 index 0000000..3123100 --- /dev/null +++ b/src/dbutil/mysql_gorm.go @@ -0,0 +1,88 @@ +package dbutil + +import ( + "fmt" + "log" + "os" + "time" + + "github.com/spf13/viper" + "gorm.io/driver/mysql" + "gorm.io/gorm" + "gorm.io/gorm/logger" +) + +var MySQLDB *gorm.DB + +func InitMySQLDB() { + config := struct { + MySQL struct { + Host string `mapstructure:"host"` + Port int `mapstructure:"port"` + User string `mapstructure:"user"` + Password string `mapstructure:"password"` + Name string `mapstructure:"name"` + MaxIdleConns int `mapstructure:"max_idle_conns"` + MaxOpenConns int `mapstructure:"max_open_conns"` + ConnMaxLifetime time.Duration `mapstructure:"conn_max_lifetime"` + ConnMaxIdleTime time.Duration `mapstructure:"conn_max_idle_time"` + } `mapstructure:"mysql"` + }{} + + viper.SetConfigFile("./config/config.toml") + viper.SetConfigType("toml") + + err := viper.ReadInConfig() + if err != nil { + panic(fmt.Errorf("fatal error config file: %s", err)) + } + + err = viper.Unmarshal(&config) + if err != nil { + panic(fmt.Errorf("fatal error config file: %s", err)) + } + + dsn := fmt.Sprintf("%s:%s@tcp(%s:%d)/%s?charset=utf8mb4&parseTime=True&loc=Local", + config.MySQL.User, + config.MySQL.Password, + config.MySQL.Host, + config.MySQL.Port, + config.MySQL.Name, + ) + + newLogger := logger.New( + log.New(os.Stdout, "\r\n", log.LstdFlags), // io writer + logger.Config{ + SlowThreshold: time.Second, // Slow SQL threshold + LogLevel: logger.Error, // Log level + Colorful: true, // Enable color + }, + ) + + MySQLDB, err = gorm.Open(mysql.Open(dsn), &gorm.Config{ + Logger: newLogger, + }) + if err != nil { + panic("连接数据库失败: " + err.Error()) + } + + // MySQLDB.Debug() + + sqlDB, err := MySQLDB.DB() + if err != nil { + panic("获取数据库连接失败: " + err.Error()) + } + + // SetMaxIdleConns 设置空闲连接池中连接的最大数量 + sqlDB.SetMaxIdleConns(config.MySQL.MaxIdleConns) + + // SetMaxOpenConns 设置打开数据库连接的最大数量。 + sqlDB.SetMaxOpenConns(config.MySQL.MaxOpenConns) + + // SetConnMaxLifetime 设置了连接可复用的最大时间。 + sqlDB.SetConnMaxLifetime(config.MySQL.ConnMaxLifetime * time.Hour) + + // SetConnMaxIdleTime 设置连接最大空闲时间 + sqlDB.SetConnMaxIdleTime(config.MySQL.ConnMaxIdleTime * time.Minute) + +} diff --git a/src/mapper/gen.go b/src/mapper/gen.go new file mode 100644 index 0000000..b6867ad --- /dev/null +++ b/src/mapper/gen.go @@ -0,0 +1,111 @@ +// Code generated by gorm.io/gen. DO NOT EDIT. +// Code generated by gorm.io/gen. DO NOT EDIT. +// Code generated by gorm.io/gen. DO NOT EDIT. + +package mapper + +import ( + "context" + "database/sql" + + "gorm.io/gorm" + + "gorm.io/gen" + + "gorm.io/plugin/dbresolver" +) + +var ( + Q = new(Query) + LibraryProduct *libraryProduct + LibraryProductImage *libraryProductImage +) + +func SetDefault(db *gorm.DB, opts ...gen.DOOption) { + *Q = *Use(db, opts...) + LibraryProduct = &Q.LibraryProduct + LibraryProductImage = &Q.LibraryProductImage +} + +func Use(db *gorm.DB, opts ...gen.DOOption) *Query { + return &Query{ + db: db, + LibraryProduct: newLibraryProduct(db, opts...), + LibraryProductImage: newLibraryProductImage(db, opts...), + } +} + +type Query struct { + db *gorm.DB + + LibraryProduct libraryProduct + LibraryProductImage libraryProductImage +} + +func (q *Query) Available() bool { return q.db != nil } + +func (q *Query) clone(db *gorm.DB) *Query { + return &Query{ + db: db, + LibraryProduct: q.LibraryProduct.clone(db), + LibraryProductImage: q.LibraryProductImage.clone(db), + } +} + +func (q *Query) ReadDB() *Query { + return q.ReplaceDB(q.db.Clauses(dbresolver.Read)) +} + +func (q *Query) WriteDB() *Query { + return q.ReplaceDB(q.db.Clauses(dbresolver.Write)) +} + +func (q *Query) ReplaceDB(db *gorm.DB) *Query { + return &Query{ + db: db, + LibraryProduct: q.LibraryProduct.replaceDB(db), + LibraryProductImage: q.LibraryProductImage.replaceDB(db), + } +} + +type queryCtx struct { + LibraryProduct ILibraryProductDo + LibraryProductImage ILibraryProductImageDo +} + +func (q *Query) WithContext(ctx context.Context) *queryCtx { + return &queryCtx{ + LibraryProduct: q.LibraryProduct.WithContext(ctx), + LibraryProductImage: q.LibraryProductImage.WithContext(ctx), + } +} + +func (q *Query) Transaction(fc func(tx *Query) error, opts ...*sql.TxOptions) error { + return q.db.Transaction(func(tx *gorm.DB) error { return fc(q.clone(tx)) }, opts...) +} + +func (q *Query) Begin(opts ...*sql.TxOptions) *QueryTx { + tx := q.db.Begin(opts...) + return &QueryTx{Query: q.clone(tx), Error: tx.Error} +} + +type QueryTx struct { + *Query + Error error +} + +func (q *QueryTx) Commit() error { + return q.db.Commit().Error +} + +func (q *QueryTx) Rollback() error { + return q.db.Rollback().Error +} + +func (q *QueryTx) SavePoint(name string) error { + return q.db.SavePoint(name).Error +} + +func (q *QueryTx) RollbackTo(name string) error { + return q.db.RollbackTo(name).Error +} diff --git a/src/mapper/library_product.gen.go b/src/mapper/library_product.gen.go new file mode 100644 index 0000000..d9bdc50 --- /dev/null +++ b/src/mapper/library_product.gen.go @@ -0,0 +1,472 @@ +// Code generated by gorm.io/gen. DO NOT EDIT. +// Code generated by gorm.io/gen. DO NOT EDIT. +// Code generated by gorm.io/gen. DO NOT EDIT. + +package mapper + +import ( + "context" + "database/sql" + + "gorm.io/gorm" + "gorm.io/gorm/clause" + "gorm.io/gorm/schema" + + "gorm.io/gen" + "gorm.io/gen/field" + + "gorm.io/plugin/dbresolver" + + "fafa-crawler/src/models" +) + +func newLibraryProduct(db *gorm.DB, opts ...gen.DOOption) libraryProduct { + _libraryProduct := libraryProduct{} + + _libraryProduct.libraryProductDo.UseDB(db, opts...) + _libraryProduct.libraryProductDo.UseModel(&models.LibraryProduct{}) + + tableName := _libraryProduct.libraryProductDo.TableName() + _libraryProduct.ALL = field.NewAsterisk(tableName) + _libraryProduct.ID = field.NewUint64(tableName, "id") + _libraryProduct.Sname = field.NewString(tableName, "sname") + _libraryProduct.Name = field.NewString(tableName, "name") + _libraryProduct.Title = field.NewString(tableName, "title") + _libraryProduct.Barcode = field.NewString(tableName, "barcode") + _libraryProduct.Category1St = field.NewString(tableName, "category_1st") + _libraryProduct.Category2Nd = field.NewString(tableName, "category_2nd") + _libraryProduct.Category = field.NewString(tableName, "category") + _libraryProduct.Price = field.NewFloat64(tableName, "price") + _libraryProduct.Keywords = field.NewString(tableName, "keywords") + _libraryProduct.Thumb = field.NewString(tableName, "thumb") + _libraryProduct.Brand = field.NewString(tableName, "brand") + _libraryProduct.Spec = field.NewString(tableName, "spec") + _libraryProduct.MadeIn = field.NewString(tableName, "made_in") + _libraryProduct.Weight = field.NewString(tableName, "weight") + _libraryProduct.WeightUnit = field.NewString(tableName, "weight_unit") + _libraryProduct.SellingPoint = field.NewString(tableName, "selling_point") + _libraryProduct.Intro = field.NewString(tableName, "intro") + _libraryProduct.Seq = field.NewInt32(tableName, "seq") + _libraryProduct.Source = field.NewString(tableName, "source") + _libraryProduct.Status = field.NewInt32(tableName, "status") + _libraryProduct.CreatedAt = field.NewTime(tableName, "created_at") + _libraryProduct.UpdatedAt = field.NewTime(tableName, "updated_at") + + _libraryProduct.fillFieldMap() + + return _libraryProduct +} + +// libraryProduct 商品库商品表 +type libraryProduct struct { + libraryProductDo + + ALL field.Asterisk + ID field.Uint64 // 商品ID + Sname field.String // 商品简称,作为匹配关键字 + Name field.String // 商品名 + Title field.String // 商品标题 + Barcode field.String // 条形码/Barcode + Category1St field.String // 第一级分类 + Category2Nd field.String // 第二级分类 + Category field.String // 第3级分类 + Price field.Float64 // 商品原价 + Keywords field.String // 商品关键字 + Thumb field.String // 封面图主图 + Brand field.String // 品牌 + Spec field.String // 商品规格,多个使用英文分号分割 + MadeIn field.String // 产地 + Weight field.String // 重量值 + WeightUnit field.String // 重量单位 + SellingPoint field.String // 商品卖点 + Intro field.String // 商品介绍 + Seq field.Int32 // 排序值,越小越前面 + Source field.String // 来源ID + Status field.Int32 // 状态 1-有效 2-无效 + CreatedAt field.Time // 创建时间 + UpdatedAt field.Time // 更新时间 + + fieldMap map[string]field.Expr +} + +func (l libraryProduct) Table(newTableName string) *libraryProduct { + l.libraryProductDo.UseTable(newTableName) + return l.updateTableName(newTableName) +} + +func (l libraryProduct) As(alias string) *libraryProduct { + l.libraryProductDo.DO = *(l.libraryProductDo.As(alias).(*gen.DO)) + return l.updateTableName(alias) +} + +func (l *libraryProduct) updateTableName(table string) *libraryProduct { + l.ALL = field.NewAsterisk(table) + l.ID = field.NewUint64(table, "id") + l.Sname = field.NewString(table, "sname") + l.Name = field.NewString(table, "name") + l.Title = field.NewString(table, "title") + l.Barcode = field.NewString(table, "barcode") + l.Category1St = field.NewString(table, "category_1st") + l.Category2Nd = field.NewString(table, "category_2nd") + l.Category = field.NewString(table, "category") + l.Price = field.NewFloat64(table, "price") + l.Keywords = field.NewString(table, "keywords") + l.Thumb = field.NewString(table, "thumb") + l.Brand = field.NewString(table, "brand") + l.Spec = field.NewString(table, "spec") + l.MadeIn = field.NewString(table, "made_in") + l.Weight = field.NewString(table, "weight") + l.WeightUnit = field.NewString(table, "weight_unit") + l.SellingPoint = field.NewString(table, "selling_point") + l.Intro = field.NewString(table, "intro") + l.Seq = field.NewInt32(table, "seq") + l.Source = field.NewString(table, "source") + l.Status = field.NewInt32(table, "status") + l.CreatedAt = field.NewTime(table, "created_at") + l.UpdatedAt = field.NewTime(table, "updated_at") + + l.fillFieldMap() + + return l +} + +func (l *libraryProduct) GetFieldByName(fieldName string) (field.OrderExpr, bool) { + _f, ok := l.fieldMap[fieldName] + if !ok || _f == nil { + return nil, false + } + _oe, ok := _f.(field.OrderExpr) + return _oe, ok +} + +func (l *libraryProduct) fillFieldMap() { + l.fieldMap = make(map[string]field.Expr, 23) + l.fieldMap["id"] = l.ID + l.fieldMap["sname"] = l.Sname + l.fieldMap["name"] = l.Name + l.fieldMap["title"] = l.Title + l.fieldMap["barcode"] = l.Barcode + l.fieldMap["category_1st"] = l.Category1St + l.fieldMap["category_2nd"] = l.Category2Nd + l.fieldMap["category"] = l.Category + l.fieldMap["price"] = l.Price + l.fieldMap["keywords"] = l.Keywords + l.fieldMap["thumb"] = l.Thumb + l.fieldMap["brand"] = l.Brand + l.fieldMap["spec"] = l.Spec + l.fieldMap["made_in"] = l.MadeIn + l.fieldMap["weight"] = l.Weight + l.fieldMap["weight_unit"] = l.WeightUnit + l.fieldMap["selling_point"] = l.SellingPoint + l.fieldMap["intro"] = l.Intro + l.fieldMap["seq"] = l.Seq + l.fieldMap["source"] = l.Source + l.fieldMap["status"] = l.Status + l.fieldMap["created_at"] = l.CreatedAt + l.fieldMap["updated_at"] = l.UpdatedAt +} + +func (l libraryProduct) clone(db *gorm.DB) libraryProduct { + l.libraryProductDo.ReplaceConnPool(db.Statement.ConnPool) + return l +} + +func (l libraryProduct) replaceDB(db *gorm.DB) libraryProduct { + l.libraryProductDo.ReplaceDB(db) + return l +} + +type libraryProductDo struct{ gen.DO } + +type ILibraryProductDo interface { + gen.SubQuery + Debug() ILibraryProductDo + WithContext(ctx context.Context) ILibraryProductDo + WithResult(fc func(tx gen.Dao)) gen.ResultInfo + ReplaceDB(db *gorm.DB) + ReadDB() ILibraryProductDo + WriteDB() ILibraryProductDo + As(alias string) gen.Dao + Session(config *gorm.Session) ILibraryProductDo + Columns(cols ...field.Expr) gen.Columns + Clauses(conds ...clause.Expression) ILibraryProductDo + Not(conds ...gen.Condition) ILibraryProductDo + Or(conds ...gen.Condition) ILibraryProductDo + Select(conds ...field.Expr) ILibraryProductDo + Where(conds ...gen.Condition) ILibraryProductDo + Order(conds ...field.Expr) ILibraryProductDo + Distinct(cols ...field.Expr) ILibraryProductDo + Omit(cols ...field.Expr) ILibraryProductDo + Join(table schema.Tabler, on ...field.Expr) ILibraryProductDo + LeftJoin(table schema.Tabler, on ...field.Expr) ILibraryProductDo + RightJoin(table schema.Tabler, on ...field.Expr) ILibraryProductDo + Group(cols ...field.Expr) ILibraryProductDo + Having(conds ...gen.Condition) ILibraryProductDo + Limit(limit int) ILibraryProductDo + Offset(offset int) ILibraryProductDo + Count() (count int64, err error) + Scopes(funcs ...func(gen.Dao) gen.Dao) ILibraryProductDo + Unscoped() ILibraryProductDo + Create(values ...*models.LibraryProduct) error + CreateInBatches(values []*models.LibraryProduct, batchSize int) error + Save(values ...*models.LibraryProduct) error + First() (*models.LibraryProduct, error) + Take() (*models.LibraryProduct, error) + Last() (*models.LibraryProduct, error) + Find() ([]*models.LibraryProduct, error) + FindInBatch(batchSize int, fc func(tx gen.Dao, batch int) error) (results []*models.LibraryProduct, err error) + FindInBatches(result *[]*models.LibraryProduct, batchSize int, fc func(tx gen.Dao, batch int) error) error + Pluck(column field.Expr, dest interface{}) error + Delete(...*models.LibraryProduct) (info gen.ResultInfo, err error) + Update(column field.Expr, value interface{}) (info gen.ResultInfo, err error) + UpdateSimple(columns ...field.AssignExpr) (info gen.ResultInfo, err error) + Updates(value interface{}) (info gen.ResultInfo, err error) + UpdateColumn(column field.Expr, value interface{}) (info gen.ResultInfo, err error) + UpdateColumnSimple(columns ...field.AssignExpr) (info gen.ResultInfo, err error) + UpdateColumns(value interface{}) (info gen.ResultInfo, err error) + UpdateFrom(q gen.SubQuery) gen.Dao + Attrs(attrs ...field.AssignExpr) ILibraryProductDo + Assign(attrs ...field.AssignExpr) ILibraryProductDo + Joins(fields ...field.RelationField) ILibraryProductDo + Preload(fields ...field.RelationField) ILibraryProductDo + FirstOrInit() (*models.LibraryProduct, error) + FirstOrCreate() (*models.LibraryProduct, error) + FindByPage(offset int, limit int) (result []*models.LibraryProduct, count int64, err error) + ScanByPage(result interface{}, offset int, limit int) (count int64, err error) + Rows() (*sql.Rows, error) + Row() *sql.Row + Scan(result interface{}) (err error) + Returning(value interface{}, columns ...string) ILibraryProductDo + UnderlyingDB() *gorm.DB + schema.Tabler +} + +func (l libraryProductDo) Debug() ILibraryProductDo { + return l.withDO(l.DO.Debug()) +} + +func (l libraryProductDo) WithContext(ctx context.Context) ILibraryProductDo { + return l.withDO(l.DO.WithContext(ctx)) +} + +func (l libraryProductDo) ReadDB() ILibraryProductDo { + return l.Clauses(dbresolver.Read) +} + +func (l libraryProductDo) WriteDB() ILibraryProductDo { + return l.Clauses(dbresolver.Write) +} + +func (l libraryProductDo) Session(config *gorm.Session) ILibraryProductDo { + return l.withDO(l.DO.Session(config)) +} + +func (l libraryProductDo) Clauses(conds ...clause.Expression) ILibraryProductDo { + return l.withDO(l.DO.Clauses(conds...)) +} + +func (l libraryProductDo) Returning(value interface{}, columns ...string) ILibraryProductDo { + return l.withDO(l.DO.Returning(value, columns...)) +} + +func (l libraryProductDo) Not(conds ...gen.Condition) ILibraryProductDo { + return l.withDO(l.DO.Not(conds...)) +} + +func (l libraryProductDo) Or(conds ...gen.Condition) ILibraryProductDo { + return l.withDO(l.DO.Or(conds...)) +} + +func (l libraryProductDo) Select(conds ...field.Expr) ILibraryProductDo { + return l.withDO(l.DO.Select(conds...)) +} + +func (l libraryProductDo) Where(conds ...gen.Condition) ILibraryProductDo { + return l.withDO(l.DO.Where(conds...)) +} + +func (l libraryProductDo) Order(conds ...field.Expr) ILibraryProductDo { + return l.withDO(l.DO.Order(conds...)) +} + +func (l libraryProductDo) Distinct(cols ...field.Expr) ILibraryProductDo { + return l.withDO(l.DO.Distinct(cols...)) +} + +func (l libraryProductDo) Omit(cols ...field.Expr) ILibraryProductDo { + return l.withDO(l.DO.Omit(cols...)) +} + +func (l libraryProductDo) Join(table schema.Tabler, on ...field.Expr) ILibraryProductDo { + return l.withDO(l.DO.Join(table, on...)) +} + +func (l libraryProductDo) LeftJoin(table schema.Tabler, on ...field.Expr) ILibraryProductDo { + return l.withDO(l.DO.LeftJoin(table, on...)) +} + +func (l libraryProductDo) RightJoin(table schema.Tabler, on ...field.Expr) ILibraryProductDo { + return l.withDO(l.DO.RightJoin(table, on...)) +} + +func (l libraryProductDo) Group(cols ...field.Expr) ILibraryProductDo { + return l.withDO(l.DO.Group(cols...)) +} + +func (l libraryProductDo) Having(conds ...gen.Condition) ILibraryProductDo { + return l.withDO(l.DO.Having(conds...)) +} + +func (l libraryProductDo) Limit(limit int) ILibraryProductDo { + return l.withDO(l.DO.Limit(limit)) +} + +func (l libraryProductDo) Offset(offset int) ILibraryProductDo { + return l.withDO(l.DO.Offset(offset)) +} + +func (l libraryProductDo) Scopes(funcs ...func(gen.Dao) gen.Dao) ILibraryProductDo { + return l.withDO(l.DO.Scopes(funcs...)) +} + +func (l libraryProductDo) Unscoped() ILibraryProductDo { + return l.withDO(l.DO.Unscoped()) +} + +func (l libraryProductDo) Create(values ...*models.LibraryProduct) error { + if len(values) == 0 { + return nil + } + return l.DO.Create(values) +} + +func (l libraryProductDo) CreateInBatches(values []*models.LibraryProduct, batchSize int) error { + return l.DO.CreateInBatches(values, batchSize) +} + +// Save : !!! underlying implementation is different with GORM +// The method is equivalent to executing the statement: db.Clauses(clause.OnConflict{UpdateAll: true}).Create(values) +func (l libraryProductDo) Save(values ...*models.LibraryProduct) error { + if len(values) == 0 { + return nil + } + return l.DO.Save(values) +} + +func (l libraryProductDo) First() (*models.LibraryProduct, error) { + if result, err := l.DO.First(); err != nil { + return nil, err + } else { + return result.(*models.LibraryProduct), nil + } +} + +func (l libraryProductDo) Take() (*models.LibraryProduct, error) { + if result, err := l.DO.Take(); err != nil { + return nil, err + } else { + return result.(*models.LibraryProduct), nil + } +} + +func (l libraryProductDo) Last() (*models.LibraryProduct, error) { + if result, err := l.DO.Last(); err != nil { + return nil, err + } else { + return result.(*models.LibraryProduct), nil + } +} + +func (l libraryProductDo) Find() ([]*models.LibraryProduct, error) { + result, err := l.DO.Find() + return result.([]*models.LibraryProduct), err +} + +func (l libraryProductDo) FindInBatch(batchSize int, fc func(tx gen.Dao, batch int) error) (results []*models.LibraryProduct, err error) { + buf := make([]*models.LibraryProduct, 0, batchSize) + err = l.DO.FindInBatches(&buf, batchSize, func(tx gen.Dao, batch int) error { + defer func() { results = append(results, buf...) }() + return fc(tx, batch) + }) + return results, err +} + +func (l libraryProductDo) FindInBatches(result *[]*models.LibraryProduct, batchSize int, fc func(tx gen.Dao, batch int) error) error { + return l.DO.FindInBatches(result, batchSize, fc) +} + +func (l libraryProductDo) Attrs(attrs ...field.AssignExpr) ILibraryProductDo { + return l.withDO(l.DO.Attrs(attrs...)) +} + +func (l libraryProductDo) Assign(attrs ...field.AssignExpr) ILibraryProductDo { + return l.withDO(l.DO.Assign(attrs...)) +} + +func (l libraryProductDo) Joins(fields ...field.RelationField) ILibraryProductDo { + for _, _f := range fields { + l = *l.withDO(l.DO.Joins(_f)) + } + return &l +} + +func (l libraryProductDo) Preload(fields ...field.RelationField) ILibraryProductDo { + for _, _f := range fields { + l = *l.withDO(l.DO.Preload(_f)) + } + return &l +} + +func (l libraryProductDo) FirstOrInit() (*models.LibraryProduct, error) { + if result, err := l.DO.FirstOrInit(); err != nil { + return nil, err + } else { + return result.(*models.LibraryProduct), nil + } +} + +func (l libraryProductDo) FirstOrCreate() (*models.LibraryProduct, error) { + if result, err := l.DO.FirstOrCreate(); err != nil { + return nil, err + } else { + return result.(*models.LibraryProduct), nil + } +} + +func (l libraryProductDo) FindByPage(offset int, limit int) (result []*models.LibraryProduct, count int64, err error) { + result, err = l.Offset(offset).Limit(limit).Find() + if err != nil { + return + } + + if size := len(result); 0 < limit && 0 < size && size < limit { + count = int64(size + offset) + return + } + + count, err = l.Offset(-1).Limit(-1).Count() + return +} + +func (l libraryProductDo) ScanByPage(result interface{}, offset int, limit int) (count int64, err error) { + count, err = l.Count() + if err != nil { + return + } + + err = l.Offset(offset).Limit(limit).Scan(result) + return +} + +func (l libraryProductDo) Scan(result interface{}) (err error) { + return l.DO.Scan(result) +} + +func (l libraryProductDo) Delete(models ...*models.LibraryProduct) (result gen.ResultInfo, err error) { + return l.DO.Delete(models) +} + +func (l *libraryProductDo) withDO(do gen.Dao) *libraryProductDo { + l.DO = *do.(*gen.DO) + return l +} diff --git a/src/mapper/library_product_image.gen.go b/src/mapper/library_product_image.gen.go new file mode 100644 index 0000000..b1c416f --- /dev/null +++ b/src/mapper/library_product_image.gen.go @@ -0,0 +1,412 @@ +// Code generated by gorm.io/gen. DO NOT EDIT. +// Code generated by gorm.io/gen. DO NOT EDIT. +// Code generated by gorm.io/gen. DO NOT EDIT. + +package mapper + +import ( + "context" + "database/sql" + + "gorm.io/gorm" + "gorm.io/gorm/clause" + "gorm.io/gorm/schema" + + "gorm.io/gen" + "gorm.io/gen/field" + + "gorm.io/plugin/dbresolver" + + "fafa-crawler/src/models" +) + +func newLibraryProductImage(db *gorm.DB, opts ...gen.DOOption) libraryProductImage { + _libraryProductImage := libraryProductImage{} + + _libraryProductImage.libraryProductImageDo.UseDB(db, opts...) + _libraryProductImage.libraryProductImageDo.UseModel(&models.LibraryProductImage{}) + + tableName := _libraryProductImage.libraryProductImageDo.TableName() + _libraryProductImage.ALL = field.NewAsterisk(tableName) + _libraryProductImage.ID = field.NewUint64(tableName, "id") + _libraryProductImage.ProductID = field.NewUint64(tableName, "product_id") + _libraryProductImage.ImageURL = field.NewString(tableName, "image_url") + _libraryProductImage.IsMain = field.NewInt32(tableName, "is_main") + _libraryProductImage.Seq = field.NewInt32(tableName, "seq") + _libraryProductImage.Status = field.NewInt32(tableName, "status") + _libraryProductImage.CreatedAt = field.NewTime(tableName, "created_at") + _libraryProductImage.UpdatedAt = field.NewTime(tableName, "updated_at") + + _libraryProductImage.fillFieldMap() + + return _libraryProductImage +} + +// libraryProductImage 商品库商品图片 +type libraryProductImage struct { + libraryProductImageDo + + ALL field.Asterisk + ID field.Uint64 // 图片ID + ProductID field.Uint64 // 商品ID + ImageURL field.String // 图片地址 + IsMain field.Int32 // 是否主图 1-主图 0-副图 + Seq field.Int32 // 排序值,越小越前面 + Status field.Int32 // 状态 1-有效 2-无效 + CreatedAt field.Time // 创建时间 + UpdatedAt field.Time // 更新时间 + + fieldMap map[string]field.Expr +} + +func (l libraryProductImage) Table(newTableName string) *libraryProductImage { + l.libraryProductImageDo.UseTable(newTableName) + return l.updateTableName(newTableName) +} + +func (l libraryProductImage) As(alias string) *libraryProductImage { + l.libraryProductImageDo.DO = *(l.libraryProductImageDo.As(alias).(*gen.DO)) + return l.updateTableName(alias) +} + +func (l *libraryProductImage) updateTableName(table string) *libraryProductImage { + l.ALL = field.NewAsterisk(table) + l.ID = field.NewUint64(table, "id") + l.ProductID = field.NewUint64(table, "product_id") + l.ImageURL = field.NewString(table, "image_url") + l.IsMain = field.NewInt32(table, "is_main") + l.Seq = field.NewInt32(table, "seq") + l.Status = field.NewInt32(table, "status") + l.CreatedAt = field.NewTime(table, "created_at") + l.UpdatedAt = field.NewTime(table, "updated_at") + + l.fillFieldMap() + + return l +} + +func (l *libraryProductImage) GetFieldByName(fieldName string) (field.OrderExpr, bool) { + _f, ok := l.fieldMap[fieldName] + if !ok || _f == nil { + return nil, false + } + _oe, ok := _f.(field.OrderExpr) + return _oe, ok +} + +func (l *libraryProductImage) fillFieldMap() { + l.fieldMap = make(map[string]field.Expr, 8) + l.fieldMap["id"] = l.ID + l.fieldMap["product_id"] = l.ProductID + l.fieldMap["image_url"] = l.ImageURL + l.fieldMap["is_main"] = l.IsMain + l.fieldMap["seq"] = l.Seq + l.fieldMap["status"] = l.Status + l.fieldMap["created_at"] = l.CreatedAt + l.fieldMap["updated_at"] = l.UpdatedAt +} + +func (l libraryProductImage) clone(db *gorm.DB) libraryProductImage { + l.libraryProductImageDo.ReplaceConnPool(db.Statement.ConnPool) + return l +} + +func (l libraryProductImage) replaceDB(db *gorm.DB) libraryProductImage { + l.libraryProductImageDo.ReplaceDB(db) + return l +} + +type libraryProductImageDo struct{ gen.DO } + +type ILibraryProductImageDo interface { + gen.SubQuery + Debug() ILibraryProductImageDo + WithContext(ctx context.Context) ILibraryProductImageDo + WithResult(fc func(tx gen.Dao)) gen.ResultInfo + ReplaceDB(db *gorm.DB) + ReadDB() ILibraryProductImageDo + WriteDB() ILibraryProductImageDo + As(alias string) gen.Dao + Session(config *gorm.Session) ILibraryProductImageDo + Columns(cols ...field.Expr) gen.Columns + Clauses(conds ...clause.Expression) ILibraryProductImageDo + Not(conds ...gen.Condition) ILibraryProductImageDo + Or(conds ...gen.Condition) ILibraryProductImageDo + Select(conds ...field.Expr) ILibraryProductImageDo + Where(conds ...gen.Condition) ILibraryProductImageDo + Order(conds ...field.Expr) ILibraryProductImageDo + Distinct(cols ...field.Expr) ILibraryProductImageDo + Omit(cols ...field.Expr) ILibraryProductImageDo + Join(table schema.Tabler, on ...field.Expr) ILibraryProductImageDo + LeftJoin(table schema.Tabler, on ...field.Expr) ILibraryProductImageDo + RightJoin(table schema.Tabler, on ...field.Expr) ILibraryProductImageDo + Group(cols ...field.Expr) ILibraryProductImageDo + Having(conds ...gen.Condition) ILibraryProductImageDo + Limit(limit int) ILibraryProductImageDo + Offset(offset int) ILibraryProductImageDo + Count() (count int64, err error) + Scopes(funcs ...func(gen.Dao) gen.Dao) ILibraryProductImageDo + Unscoped() ILibraryProductImageDo + Create(values ...*models.LibraryProductImage) error + CreateInBatches(values []*models.LibraryProductImage, batchSize int) error + Save(values ...*models.LibraryProductImage) error + First() (*models.LibraryProductImage, error) + Take() (*models.LibraryProductImage, error) + Last() (*models.LibraryProductImage, error) + Find() ([]*models.LibraryProductImage, error) + FindInBatch(batchSize int, fc func(tx gen.Dao, batch int) error) (results []*models.LibraryProductImage, err error) + FindInBatches(result *[]*models.LibraryProductImage, batchSize int, fc func(tx gen.Dao, batch int) error) error + Pluck(column field.Expr, dest interface{}) error + Delete(...*models.LibraryProductImage) (info gen.ResultInfo, err error) + Update(column field.Expr, value interface{}) (info gen.ResultInfo, err error) + UpdateSimple(columns ...field.AssignExpr) (info gen.ResultInfo, err error) + Updates(value interface{}) (info gen.ResultInfo, err error) + UpdateColumn(column field.Expr, value interface{}) (info gen.ResultInfo, err error) + UpdateColumnSimple(columns ...field.AssignExpr) (info gen.ResultInfo, err error) + UpdateColumns(value interface{}) (info gen.ResultInfo, err error) + UpdateFrom(q gen.SubQuery) gen.Dao + Attrs(attrs ...field.AssignExpr) ILibraryProductImageDo + Assign(attrs ...field.AssignExpr) ILibraryProductImageDo + Joins(fields ...field.RelationField) ILibraryProductImageDo + Preload(fields ...field.RelationField) ILibraryProductImageDo + FirstOrInit() (*models.LibraryProductImage, error) + FirstOrCreate() (*models.LibraryProductImage, error) + FindByPage(offset int, limit int) (result []*models.LibraryProductImage, count int64, err error) + ScanByPage(result interface{}, offset int, limit int) (count int64, err error) + Rows() (*sql.Rows, error) + Row() *sql.Row + Scan(result interface{}) (err error) + Returning(value interface{}, columns ...string) ILibraryProductImageDo + UnderlyingDB() *gorm.DB + schema.Tabler +} + +func (l libraryProductImageDo) Debug() ILibraryProductImageDo { + return l.withDO(l.DO.Debug()) +} + +func (l libraryProductImageDo) WithContext(ctx context.Context) ILibraryProductImageDo { + return l.withDO(l.DO.WithContext(ctx)) +} + +func (l libraryProductImageDo) ReadDB() ILibraryProductImageDo { + return l.Clauses(dbresolver.Read) +} + +func (l libraryProductImageDo) WriteDB() ILibraryProductImageDo { + return l.Clauses(dbresolver.Write) +} + +func (l libraryProductImageDo) Session(config *gorm.Session) ILibraryProductImageDo { + return l.withDO(l.DO.Session(config)) +} + +func (l libraryProductImageDo) Clauses(conds ...clause.Expression) ILibraryProductImageDo { + return l.withDO(l.DO.Clauses(conds...)) +} + +func (l libraryProductImageDo) Returning(value interface{}, columns ...string) ILibraryProductImageDo { + return l.withDO(l.DO.Returning(value, columns...)) +} + +func (l libraryProductImageDo) Not(conds ...gen.Condition) ILibraryProductImageDo { + return l.withDO(l.DO.Not(conds...)) +} + +func (l libraryProductImageDo) Or(conds ...gen.Condition) ILibraryProductImageDo { + return l.withDO(l.DO.Or(conds...)) +} + +func (l libraryProductImageDo) Select(conds ...field.Expr) ILibraryProductImageDo { + return l.withDO(l.DO.Select(conds...)) +} + +func (l libraryProductImageDo) Where(conds ...gen.Condition) ILibraryProductImageDo { + return l.withDO(l.DO.Where(conds...)) +} + +func (l libraryProductImageDo) Order(conds ...field.Expr) ILibraryProductImageDo { + return l.withDO(l.DO.Order(conds...)) +} + +func (l libraryProductImageDo) Distinct(cols ...field.Expr) ILibraryProductImageDo { + return l.withDO(l.DO.Distinct(cols...)) +} + +func (l libraryProductImageDo) Omit(cols ...field.Expr) ILibraryProductImageDo { + return l.withDO(l.DO.Omit(cols...)) +} + +func (l libraryProductImageDo) Join(table schema.Tabler, on ...field.Expr) ILibraryProductImageDo { + return l.withDO(l.DO.Join(table, on...)) +} + +func (l libraryProductImageDo) LeftJoin(table schema.Tabler, on ...field.Expr) ILibraryProductImageDo { + return l.withDO(l.DO.LeftJoin(table, on...)) +} + +func (l libraryProductImageDo) RightJoin(table schema.Tabler, on ...field.Expr) ILibraryProductImageDo { + return l.withDO(l.DO.RightJoin(table, on...)) +} + +func (l libraryProductImageDo) Group(cols ...field.Expr) ILibraryProductImageDo { + return l.withDO(l.DO.Group(cols...)) +} + +func (l libraryProductImageDo) Having(conds ...gen.Condition) ILibraryProductImageDo { + return l.withDO(l.DO.Having(conds...)) +} + +func (l libraryProductImageDo) Limit(limit int) ILibraryProductImageDo { + return l.withDO(l.DO.Limit(limit)) +} + +func (l libraryProductImageDo) Offset(offset int) ILibraryProductImageDo { + return l.withDO(l.DO.Offset(offset)) +} + +func (l libraryProductImageDo) Scopes(funcs ...func(gen.Dao) gen.Dao) ILibraryProductImageDo { + return l.withDO(l.DO.Scopes(funcs...)) +} + +func (l libraryProductImageDo) Unscoped() ILibraryProductImageDo { + return l.withDO(l.DO.Unscoped()) +} + +func (l libraryProductImageDo) Create(values ...*models.LibraryProductImage) error { + if len(values) == 0 { + return nil + } + return l.DO.Create(values) +} + +func (l libraryProductImageDo) CreateInBatches(values []*models.LibraryProductImage, batchSize int) error { + return l.DO.CreateInBatches(values, batchSize) +} + +// Save : !!! underlying implementation is different with GORM +// The method is equivalent to executing the statement: db.Clauses(clause.OnConflict{UpdateAll: true}).Create(values) +func (l libraryProductImageDo) Save(values ...*models.LibraryProductImage) error { + if len(values) == 0 { + return nil + } + return l.DO.Save(values) +} + +func (l libraryProductImageDo) First() (*models.LibraryProductImage, error) { + if result, err := l.DO.First(); err != nil { + return nil, err + } else { + return result.(*models.LibraryProductImage), nil + } +} + +func (l libraryProductImageDo) Take() (*models.LibraryProductImage, error) { + if result, err := l.DO.Take(); err != nil { + return nil, err + } else { + return result.(*models.LibraryProductImage), nil + } +} + +func (l libraryProductImageDo) Last() (*models.LibraryProductImage, error) { + if result, err := l.DO.Last(); err != nil { + return nil, err + } else { + return result.(*models.LibraryProductImage), nil + } +} + +func (l libraryProductImageDo) Find() ([]*models.LibraryProductImage, error) { + result, err := l.DO.Find() + return result.([]*models.LibraryProductImage), err +} + +func (l libraryProductImageDo) FindInBatch(batchSize int, fc func(tx gen.Dao, batch int) error) (results []*models.LibraryProductImage, err error) { + buf := make([]*models.LibraryProductImage, 0, batchSize) + err = l.DO.FindInBatches(&buf, batchSize, func(tx gen.Dao, batch int) error { + defer func() { results = append(results, buf...) }() + return fc(tx, batch) + }) + return results, err +} + +func (l libraryProductImageDo) FindInBatches(result *[]*models.LibraryProductImage, batchSize int, fc func(tx gen.Dao, batch int) error) error { + return l.DO.FindInBatches(result, batchSize, fc) +} + +func (l libraryProductImageDo) Attrs(attrs ...field.AssignExpr) ILibraryProductImageDo { + return l.withDO(l.DO.Attrs(attrs...)) +} + +func (l libraryProductImageDo) Assign(attrs ...field.AssignExpr) ILibraryProductImageDo { + return l.withDO(l.DO.Assign(attrs...)) +} + +func (l libraryProductImageDo) Joins(fields ...field.RelationField) ILibraryProductImageDo { + for _, _f := range fields { + l = *l.withDO(l.DO.Joins(_f)) + } + return &l +} + +func (l libraryProductImageDo) Preload(fields ...field.RelationField) ILibraryProductImageDo { + for _, _f := range fields { + l = *l.withDO(l.DO.Preload(_f)) + } + return &l +} + +func (l libraryProductImageDo) FirstOrInit() (*models.LibraryProductImage, error) { + if result, err := l.DO.FirstOrInit(); err != nil { + return nil, err + } else { + return result.(*models.LibraryProductImage), nil + } +} + +func (l libraryProductImageDo) FirstOrCreate() (*models.LibraryProductImage, error) { + if result, err := l.DO.FirstOrCreate(); err != nil { + return nil, err + } else { + return result.(*models.LibraryProductImage), nil + } +} + +func (l libraryProductImageDo) FindByPage(offset int, limit int) (result []*models.LibraryProductImage, count int64, err error) { + result, err = l.Offset(offset).Limit(limit).Find() + if err != nil { + return + } + + if size := len(result); 0 < limit && 0 < size && size < limit { + count = int64(size + offset) + return + } + + count, err = l.Offset(-1).Limit(-1).Count() + return +} + +func (l libraryProductImageDo) ScanByPage(result interface{}, offset int, limit int) (count int64, err error) { + count, err = l.Count() + if err != nil { + return + } + + err = l.Offset(offset).Limit(limit).Scan(result) + return +} + +func (l libraryProductImageDo) Scan(result interface{}) (err error) { + return l.DO.Scan(result) +} + +func (l libraryProductImageDo) Delete(models ...*models.LibraryProductImage) (result gen.ResultInfo, err error) { + return l.DO.Delete(models) +} + +func (l *libraryProductImageDo) withDO(do gen.Dao) *libraryProductImageDo { + l.DO = *do.(*gen.DO) + return l +} diff --git a/src/models/library_product.gen.go b/src/models/library_product.gen.go new file mode 100644 index 0000000..a73074b --- /dev/null +++ b/src/models/library_product.gen.go @@ -0,0 +1,43 @@ +// Code generated by gorm.io/gen. DO NOT EDIT. +// Code generated by gorm.io/gen. DO NOT EDIT. +// Code generated by gorm.io/gen. DO NOT EDIT. + +package models + +import ( + "time" +) + +const TableNameLibraryProduct = "library_product" + +// LibraryProduct 商品库商品表 +type LibraryProduct struct { + ID uint64 `gorm:"column:id;type:bigint unsigned;primaryKey;autoIncrement:true;comment:商品ID" json:"id"` // 商品ID + Sname string `gorm:"column:sname;type:varchar(100);index:idx_product_sname,priority:1;comment:商品简称,作为匹配关键字" json:"sname"` // 商品简称,作为匹配关键字 + Name string `gorm:"column:name;type:varchar(200);not null;index:idx_library_product_name,priority:1;comment:商品名" json:"name"` // 商品名 + Title string `gorm:"column:title;type:varchar(255);comment:商品标题" json:"title"` // 商品标题 + Barcode string `gorm:"column:barcode;type:varchar(64);index:idx_library_product_barcode,priority:1;comment:条形码/Barcode" json:"barcode"` // 条形码/Barcode + Category1St string `gorm:"column:category_1st;type:varchar(64);comment:第一级分类" json:"category_1st"` // 第一级分类 + Category2Nd string `gorm:"column:category_2nd;type:varchar(64);comment:第二级分类" json:"category_2nd"` // 第二级分类 + Category string `gorm:"column:category;type:varchar(64);comment:第3级分类" json:"category"` // 第3级分类 + Price float64 `gorm:"column:price;type:decimal(18,2);default:0.00;comment:商品原价" json:"price"` // 商品原价 + Keywords string `gorm:"column:keywords;type:varchar(255);not null;comment:商品关键字" json:"keywords"` // 商品关键字 + Thumb string `gorm:"column:thumb;type:varchar(512);comment:封面图主图" json:"thumb"` // 封面图主图 + Brand string `gorm:"column:brand;type:varchar(45);comment:品牌" json:"brand"` // 品牌 + Spec string `gorm:"column:spec;type:varchar(255);comment:商品规格,多个使用英文分号分割" json:"spec"` // 商品规格,多个使用英文分号分割 + MadeIn string `gorm:"column:made_in;type:varchar(80);comment:产地" json:"made_in"` // 产地 + Weight string `gorm:"column:weight;type:varchar(25);comment:重量值" json:"weight"` // 重量值 + WeightUnit string `gorm:"column:weight_unit;type:varchar(25);comment:重量单位" json:"weight_unit"` // 重量单位 + SellingPoint string `gorm:"column:selling_point;type:varchar(120);comment:商品卖点" json:"selling_point"` // 商品卖点 + Intro string `gorm:"column:intro;type:longtext;comment:商品介绍" json:"intro"` // 商品介绍 + Seq int32 `gorm:"column:seq;type:int;index:idx_library_product_seq,priority:1;default:100;comment:排序值,越小越前面" json:"seq"` // 排序值,越小越前面 + Source string `gorm:"column:source;type:varchar(45);index:idx_library_product_source,priority:1;comment:来源ID" json:"source"` // 来源ID + Status int32 `gorm:"column:status;type:int;index:idx_library_product_status,priority:1;default:1;comment:状态 1-有效 2-无效" json:"status"` // 状态 1-有效 2-无效 + CreatedAt time.Time `gorm:"column:created_at;type:datetime;default:CURRENT_TIMESTAMP;comment:创建时间" json:"created_at"` // 创建时间 + UpdatedAt time.Time `gorm:"column:updated_at;type:datetime;default:CURRENT_TIMESTAMP;comment:更新时间" json:"updated_at"` // 更新时间 +} + +// TableName LibraryProduct's table name +func (*LibraryProduct) TableName() string { + return TableNameLibraryProduct +} diff --git a/src/models/library_product_image.gen.go b/src/models/library_product_image.gen.go new file mode 100644 index 0000000..ec334b4 --- /dev/null +++ b/src/models/library_product_image.gen.go @@ -0,0 +1,28 @@ +// Code generated by gorm.io/gen. DO NOT EDIT. +// Code generated by gorm.io/gen. DO NOT EDIT. +// Code generated by gorm.io/gen. DO NOT EDIT. + +package models + +import ( + "time" +) + +const TableNameLibraryProductImage = "library_product_image" + +// LibraryProductImage 商品库商品图片 +type LibraryProductImage struct { + ID uint64 `gorm:"column:id;type:bigint unsigned;primaryKey;autoIncrement:true;comment:图片ID" json:"id"` // 图片ID + ProductID uint64 `gorm:"column:product_id;type:bigint unsigned;not null;index:idx_library_product_image_pid_img,priority:1;comment:商品ID" json:"product_id"` // 商品ID + ImageURL string `gorm:"column:image_url;type:varchar(512);not null;index:idx_library_product_image_pid_img,priority:2;comment:图片地址" json:"image_url"` // 图片地址 + IsMain int32 `gorm:"column:is_main;type:int;index:idx_library_product_image_is_main,priority:1;comment:是否主图 1-主图 0-副图" json:"is_main"` // 是否主图 1-主图 0-副图 + Seq int32 `gorm:"column:seq;type:int;default:100;comment:排序值,越小越前面" json:"seq"` // 排序值,越小越前面 + Status int32 `gorm:"column:status;type:int;index:idx_library_product_image_status,priority:1;default:1;comment:状态 1-有效 2-无效" json:"status"` // 状态 1-有效 2-无效 + CreatedAt time.Time `gorm:"column:created_at;type:datetime;default:CURRENT_TIMESTAMP;comment:创建时间" json:"created_at"` // 创建时间 + UpdatedAt time.Time `gorm:"column:updated_at;type:datetime;default:CURRENT_TIMESTAMP;comment:更新时间" json:"updated_at"` // 更新时间 +} + +// TableName LibraryProductImage's table name +func (*LibraryProductImage) TableName() string { + return TableNameLibraryProductImage +} diff --git a/src/services/init.go b/src/services/init.go new file mode 100644 index 0000000..9d12d8b --- /dev/null +++ b/src/services/init.go @@ -0,0 +1,28 @@ +package services + +import ( + "sync" + + "gorm.io/gorm" + + "fafa-crawler/src/dbutil" + "fafa-crawler/src/mapper" +) + +var ( + once sync.Once + dbGorm *gorm.DB + ormQ *mapper.Query +) + +func init() { + if dbGorm == nil { + dbutil.InitMySQLDB() + dbGorm = dbutil.MySQLDB + } + + // 初始化 gorm 的 mapper + mapper.SetDefault(dbGorm) + + ormQ = mapper.Use(dbGorm) +} diff --git a/src/services/product_image_service.go b/src/services/product_image_service.go new file mode 100644 index 0000000..9c9715e --- /dev/null +++ b/src/services/product_image_service.go @@ -0,0 +1,15 @@ +package services + +var ( + productImageService *ProductImageService +) + +type ProductImageService struct { +} + +func NewProductImageService() *ProductImageService { + once.Do(func() { + productImageService = &ProductImageService{} + }) + return productImageService +} diff --git a/src/services/product_service.go b/src/services/product_service.go new file mode 100644 index 0000000..43b2f86 --- /dev/null +++ b/src/services/product_service.go @@ -0,0 +1,230 @@ +package services + +import ( + "context" + "fmt" + "log" + + "fafa-crawler/src/beans" + "fafa-crawler/src/mapper" + "fafa-crawler/src/models" + "fafa-crawler/src/util" + + "gorm.io/gorm" +) + +var ( + productService *ProductService + filePath = "/media/images/goods_library/" +) + +type ProductService struct { + db *gorm.DB // Add database connection +} + +func NewProductService() *ProductService { // Pass database connection + once.Do(func() { + productService = &ProductService{db: dbGorm} // Initialize with database connection + }) + return productService +} + +// saveImageAndProductImage saves the image, creates the ProductImage record, and updates the product thumbnail. +func (s *ProductService) saveImageAndProductImage(ctx context.Context, tx *gorm.DB, productID uint64, pi *models.LibraryProductImage, p *models.LibraryProduct) error { + fileName, err := util.SaveImageFromURL(pi.ImageURL, fmt.Sprintf("/%s/%d/", filePath, productID)) + if err != nil { + log.Printf("保存图片失败: %v, URL: %s", err, pi.ImageURL) + return err + } + + pi.ImageURL = fileName // Update ImageURL to the local path + pi.ProductID = productID + + if err := mapper.LibraryProductImage.WithContext(ctx).Create(pi); err != nil { + log.Printf("创建 ProductImage 失败: %v, URL: %s", err, pi.ImageURL) + return err + } + + if pi.IsMain == 1 { + // Update the main image in the product record + p.Thumb = pi.ImageURL // Assuming Thumb is the main image field + if _, err := mapper.LibraryProduct.WithContext(ctx).Where(mapper.LibraryProduct.ID.Eq(productID)).Update(mapper.LibraryProduct.Thumb, pi.ImageURL); err != nil { + log.Printf("更新商品缩略图失败: %v, ProductID: %d", err, productID) + return err + } + } + return nil +} + +// SaveProduct saves a single product. +func (s *ProductService) SaveProduct(record *beans.ProductBean) bool { + if record == nil { + log.Println("商品记录为空") + return false + } + + p, pis, err := record.ConvProductModel() + if err != nil { + log.Printf("转换商品模型失败: %v", err) + return false + } + + if s.IsExits(p) { + log.Printf("商品已存在: %s", p.Name) + return false + } + + ctx := context.Background() // Create a context + tx := s.db.Begin() // Start a transaction + defer func() { + if r := recover(); r != nil { + tx.Rollback() + log.Printf("事务 panic,已回滚: %v", r) + panic(r) + } + if err := tx.Commit().Error; err != nil { + tx.Rollback() + log.Printf("提交事务失败: %v", err) + } + }() + + if err := mapper.LibraryProduct.WithContext(ctx).Create(p); err != nil { + log.Printf("保存商品失败: %v", err) + tx.Rollback() + return false + } + + for _, pi := range pis { + if s.IsExitsProductImage(p.ID, pi.ImageURL) { + continue + } + if err := s.saveImageAndProductImage(ctx, tx, p.ID, pi, p); err != nil { + // tx.Rollback() + // return false + continue + } + } + + return true +} + +// SaveMeiTuanProducts 单商品独立事务批量保存美团商品及关联图片 +// 参数: record - 待保存的美团商品数据列表(包含商品信息及图片列表) +// 返回: 成功保存的商品记录数(仅统计事务提交成功的记录) +func (s *ProductService) SaveMeiTuanProducts(record []*beans.ProductMeiTuanBean) int64 { + var successCount int64 + + for _, bean := range record { + // 1. 检查商品是否已存在,存在则跳过 + if s.IsExitsMeiTuan(bean.Product) { + continue + } + + // 2. 单商品独立事务:保存商品及关联图片 + // 注意:事务闭包返回error时会自动回滚,返回nil则自动提交 + err := s.db.Transaction(func(tx *gorm.DB) error { + // 2.1 保存商品主记录 + if err := tx.Create(bean.Product).Error; err != nil { + return fmt.Errorf("保存商品失败: %w", err) + } + + // 2.2 绑定商品ID到图片记录(批量操作替代循环赋值) + for i := range bean.ImageList { + bean.ImageList[i].ProductID = bean.Product.ID + } + + // 2.3 批量保存关联图片(空切片时自动跳过) + if err := tx.CreateInBatches(bean.ImageList, 100).Error; err != nil { + return fmt.Errorf("保存商品图片失败: %w", err) + } + + return nil // 事务提交 + }) + + // 3. 仅当事务成功时计数 + if err == nil { + successCount++ + } + } + + return successCount +} + +// IsExits checks if a product already exists based on certain criteria. +func (s *ProductService) IsExits(record *models.LibraryProduct) bool { + if record == nil { + return false + } + + m := mapper.LibraryProduct + q := m.Select(m.ID) + + if record.Source != "" { + q = q.Where(m.Source.Eq(record.Source)) + } + + if record.Barcode != "" { + q = q.Where(m.Barcode.Eq(record.Barcode)) + } + + if record.Name != "" { + q = q.Where(m.Name.Eq(record.Name)) + } + + cnt, err := q.Count() + if err != nil { + log.Printf("查询商品数量失败: %v", err) + return false + } + + return cnt > 0 +} + +// IsExitsMeiTuan 检查美团商品是否已存在 +// 参数: record - 待检查的商品记录 +// 返回: 存在返回true,否则返回false +func (s *ProductService) IsExitsMeiTuan(record *models.LibraryProduct) bool { + // 空记录直接返回不存在 + if record == nil { + return false + } + + m := mapper.LibraryProduct + q := m.Select(m.ID) + + // 优先使用条形码查询,无条形码时使用标题查询 + if record.Barcode != "" { + q = q.Where(m.Barcode.Eq(record.Barcode)) + } else if record.Title != "" { + q = q.Where(m.Title.Eq(record.Title)) + } else { + // 无有效查询条件 + return false + } + + // 执行数量统计查询 + cnt, err := q.Count() + if err != nil { + log.Printf("查询商品数量失败,错误: %v", err) + return false + } + + return cnt > 0 +} + +func (s *ProductService) IsExitsProductImage(productID uint64, imageURL string) bool { + if productID <= 0 || imageURL == "" { + return false + } + + m := mapper.LibraryProductImage + q := m.Select(m.ID).Where(m.ProductID.Eq(productID), m.ImageURL.Eq(imageURL)) + + cnt, err := q.Count() + if err != nil { + log.Printf("查询商品数量失败: %v", err) + return false + } + + return cnt > 0 +} diff --git a/src/services/sx_goods_service.go b/src/services/sx_goods_service.go new file mode 100644 index 0000000..9e23e0d --- /dev/null +++ b/src/services/sx_goods_service.go @@ -0,0 +1,431 @@ +// 思迅数据同步 +package services + +import ( + "fmt" + "log" + "os" + "path/filepath" + "time" + + "github.com/goccy/go-json" // 使用性能更好的JSON库 + "github.com/spf13/viper" + + "fafa-crawler/src/beans" + "fafa-crawler/src/mapper" + "fafa-crawler/src/models" + "fafa-crawler/src/util" + + "gorm.io/gorm" +) + +const ( + BatchSize = 400 +) + +var ( + sxGoodsService *SxGoodsService +) + +type SxGoodsService struct { + db *gorm.DB // Add database connection +} + +func NewSxGoodsService() *SxGoodsService { // Pass database connection + once.Do(func() { + sxGoodsService = &SxGoodsService{db: dbGorm} // Initialize with database connection + }) + return sxGoodsService +} + +// ProcessSiXunGoodsData 高效处理思迅商品数据的综合方法 +// 执行顺序: 1. JSON数据转换 2. 图片文件过滤 3. 批量保存到数据库 4. 上传图片到COS +func (s *SxGoodsService) ProcessSiXunGoodsData(path string) (srcDataCnt, filteredDataCnt, savedDataCnt, filteredImgCnt int64) { + // 步骤1: JSON数据转换 + srcDataCnt, targetDataCnt, sxBeanList := s.JsonDataConvBeans(path) + if targetDataCnt == 0 || len(sxBeanList) == 0 { + log.Printf("JSON数据转换失败或无有效数据") + return srcDataCnt, 0, 0, 0 + } + log.Printf("JSON数据转换完成: 源数据%d条,转换成功%d条", srcDataCnt, targetDataCnt) + + // 步骤2: 图片文件过滤 + filteredRecords, filteredDataCnt, filteredImgCnt := s.filterSiXunGoodsBeans(path, sxBeanList) + if filteredDataCnt == 0 { + log.Printf("图片文件过滤后无有效数据") + return srcDataCnt, filteredDataCnt, 0, filteredImgCnt + } + log.Printf("图片文件过滤完成: 有效记录%d条,有效图片%d张", filteredDataCnt, filteredImgCnt) + + // 步骤3: 批量保存到数据库 + savedDataCnt = s.BatchSaveSiXunProducts(filteredRecords) + log.Printf("批量保存完成: 成功保存%d条记录", savedDataCnt) + + // 步骤4: 上传图片到COS + if err := s.uploadImagesToCOS(path); err != nil { + log.Printf("上传图片到COS失败: %v", err) + } + + return srcDataCnt, filteredDataCnt, savedDataCnt, filteredImgCnt +} + +// uploadImagesToCOS 上传图片到COS +func (s *SxGoodsService) uploadImagesToCOS(path string) error { + // 获取当前日期文件夹名称 + currentTime := time.Now() + dateFolderName := currentTime.Format("20060102") + localImageDir := filepath.Join(path, dateFolderName) + + // 从配置文件读取COS配置 + if err := s.loadCOSConfig(); err != nil { + return fmt.Errorf("加载COS配置失败: %w", err) + } + + // 获取COS工具类实例 + cosConfig := util.COSConfig{ + BucketURL: viper.GetString("cossdk.bucket_url"), + SecretID: viper.GetString("cossdk.secret_id"), + SecretKey: viper.GetString("cossdk.secret_key"), + BasePath: viper.GetString("cossdk.base_path"), + } + + cosUtil, err := util.GetCOSUtil(cosConfig) + if err != nil { + return fmt.Errorf("获取COS工具类实例失败: %w", err) + } + + // 并发上传图片到COS + if err := cosUtil.ConcurrentUploadDirectory(localImageDir, dateFolderName, 10); err != nil { + return fmt.Errorf("上传图片到COS失败: %w", err) + } + + log.Printf("图片上传到COS完成: 上传目录 %s", localImageDir) + return nil +} + +// loadCOSConfig 从配置文件加载COS配置 +func (s *SxGoodsService) loadCOSConfig() error { + viper.SetConfigFile("./config/config.toml") + viper.SetConfigType("toml") + + if err := viper.ReadInConfig(); err != nil { + return fmt.Errorf("读取配置文件失败: %w", err) + } + + return nil +} + +// JsonDataConvBeans 将JSON数据转换为SiXunGoodsBean对象数组 +func (s *SxGoodsService) JsonDataConvBeans(path string) (srcDataCnt, targetDataCnt int64, sxBeanList []*beans.SiXunGoodsBean) { + // 构建完整的文件路径 + jsonFilePath := filepath.Join(path, "data", "product_data.json") + + // 读取JSON文件 + data, err := os.ReadFile(jsonFilePath) + if err != nil { + log.Printf("读取JSON文件失败: %v", err) + return 0, 0, nil + } + + // 定义临时结构体来解析JSON数据 + type tempImage struct { + ProductID string `json:"product_id"` + Barcode string `json:"barcode"` + ImageURL string `json:"image_url"` + Seq int32 `json:"seq"` + IsMain int32 `json:"is_main"` + } + + type tempProduct struct { + ProductID string `json:"product_id"` + Name string `json:"name"` + Title string `json:"title"` + Barcode string `json:"barcode"` + Category1st string `json:"category_1st"` + Category2nd string `json:"category_2nd"` + Price float64 `json:"price"` + Keywords string `json:"keywords"` + Thumb string `json:"thumb"` + Brand string `json:"brand"` + Spec string `json:"spec"` + Weight float64 `json:"weight"` + WeightUnit string `json:"weight_unit"` + Intro string `json:"intro"` + ImagesList []tempImage `json:"images_list"` + } + + // 解析JSON数据(使用高性能JSON库) + var tempProducts []tempProduct + if err := json.Unmarshal(data, &tempProducts); err != nil { + log.Printf("解析JSON数据失败: %v", err) + return 0, 0, nil + } + + // 设置源数据计数 + srcDataCnt = int64(len(tempProducts)) + + // 转换为SiXunGoodsBean对象数组 + var siXunGoodsBeans []*beans.SiXunGoodsBean + for _, tempProd := range tempProducts { + // 创建LibraryProduct模型 + product := &models.LibraryProduct{ + Name: tempProd.Name, + Sname: tempProd.Name, + Title: tempProd.Title, + Barcode: tempProd.Barcode, + Category1St: tempProd.Category1st, + Category2Nd: tempProd.Category2nd, + Price: tempProd.Price, + Keywords: tempProd.Keywords, + Thumb: tempProd.Thumb, + Brand: tempProd.Brand, + Spec: tempProd.Spec, + Weight: fmt.Sprintf("%.2f", tempProd.Weight), + WeightUnit: tempProd.WeightUnit, + Intro: tempProd.Intro, + Source: fmt.Sprintf("sixun_%s", tempProd.ProductID), + } + + // 如果Category2Nd为空,使用Category1st的值 + if product.Category2Nd == "" { + product.Category2Nd = product.Category1St + } + + // 如果Intro为空,使用Title的值 + if product.Intro == "" { + product.Intro = product.Title + } + + // 创建LibraryProductImage模型列表 + var productImages []*models.LibraryProductImage + for _, tempImg := range tempProd.ImagesList { + image := &models.LibraryProductImage{ + ImageURL: tempImg.ImageURL, + Seq: tempImg.Seq, + IsMain: tempImg.IsMain, + } + productImages = append(productImages, image) + } + + // 创建SiXunGoodsBean + bean := &beans.SiXunGoodsBean{ + Product: product, + ImageList: productImages, + } + + siXunGoodsBeans = append(siXunGoodsBeans, bean) + } + + // 设置转换成功记录数 + targetDataCnt = int64(len(siXunGoodsBeans)) + + return srcDataCnt, targetDataCnt, siXunGoodsBeans +} + +// filterSiXunGoodsBeans 过滤思迅商品记录,检查图片文件是否存在 +func (s *SxGoodsService) filterSiXunGoodsBeans(path string, records []*beans.SiXunGoodsBean) (filteredRecords []*beans.SiXunGoodsBean, targetDataCnt, targetImgCnt int64) { + // 创建日期格式的文件夹 + currentTime := time.Now() + dateFolderName := currentTime.Format("20060102") + dateFolderPath := filepath.Join(path, dateFolderName) + + // 确保日期文件夹存在 + if err := os.MkdirAll(dateFolderPath, 0755); err != nil { + log.Printf("创建日期文件夹失败: %v", err) + return records, int64(len(records)), 0 + } + + // 遍历所有记录 + for _, record := range records { + // 创建新的图片列表,只包含存在的图片 + var validImages []*models.LibraryProductImage + validImageCount := int64(0) + + // 检查每个图片是否存在 + for _, image := range record.ImageList { + imagePath := filepath.Join(path, "images", image.ImageURL) + + // 检查文件是否存在 + if _, err := os.Stat(imagePath); err == nil { + // 文件存在,添加到有效图片列表 + validImages = append(validImages, image) + validImageCount++ + + // 拷贝图片到日期文件夹(使用高性能文件拷贝工具) + targetImagePath := filepath.Join(dateFolderPath, image.ImageURL) + if err := util.CopyFile(imagePath, targetImagePath); err != nil { + log.Printf("拷贝图片文件失败: %v", err) + } + + // 更新图片路径为统一的实体示例路径 + image.ImageURL = fmt.Sprintf("/media/images/goods_library/%s/%s", dateFolderName, image.ImageURL) + } + } + + // 更新缩略图路径 + record.Product.Thumb = fmt.Sprintf("/media/images/goods_library/%s/%s", dateFolderName, record.Product.Thumb) + + // 如果有有效图片,则保留该记录 + if len(validImages) > 0 { + // 更新记录的图片列表 + record.ImageList = validImages + filteredRecords = append(filteredRecords, record) + targetImgCnt += validImageCount + } + } + + targetDataCnt = int64(len(filteredRecords)) + return filteredRecords, targetDataCnt, targetImgCnt +} + +// BatchSaveSiXunProducts 高效批量添加思迅商品和商品图片记录 +func (s *SxGoodsService) BatchSaveSiXunProducts(records []*beans.SiXunGoodsBean) int64 { + if len(records) == 0 { + return 0 + } + + var successCount int64 + totalRecords := len(records) + + // 分批处理记录 + for i := 0; i < totalRecords; i += BatchSize { + // 计算当前批次的结束索引 + end := i + BatchSize + if end > totalRecords { + end = totalRecords + } + + // 获取当前批次的数据 + batch := records[i:end] + + // 处理当前批次 + count := s.saveSiXunProductBatch(batch) + successCount += count + + log.Printf("思迅商品批量保存进度: %d/%d (当前批次保存成功: %d)\n", end, totalRecords, count) + } + + return successCount +} + +// saveSiXunProductBatch 保存思迅商品批次数据 +func (s *SxGoodsService) saveSiXunProductBatch(batch []*beans.SiXunGoodsBean) int64 { + var successCount int64 + + // 使用事务确保数据一致性 + err := s.db.Transaction(func(tx *gorm.DB) error { + // 创建临时存储商品和图片的切片 + var products []*models.LibraryProduct + var allProductImages []*models.LibraryProductImage + + // 遍历批次中的每个商品 + for _, bean := range batch { + // 检查商品是否已存在 + if s.IsExitsSiXun(bean.Product) { + continue // 如果商品已存在,跳过 + } + + // 添加商品到待插入列表 + products = append(products, bean.Product) + + // 处理商品图片 + for _, image := range bean.ImageList { + // 设置商品ID(在创建商品后会更新) + image.ProductID = bean.Product.ID + allProductImages = append(allProductImages, image) + } + } + + // 如果没有需要保存的商品,直接返回 + if len(products) == 0 { + return nil + } + + // 批量插入商品(使用批次大小) + if err := tx.CreateInBatches(products, BatchSize).Error; err != nil { + return fmt.Errorf("批量保存商品失败: %w", err) + } + + // 更新图片中的商品ID并批量插入图片 + if len(allProductImages) > 0 { + // 根据商品的条形码或名称匹配更新图片的商品ID + for i, product := range products { + for _, image := range allProductImages { + // 这里假设图片已经正确关联到商品 + // 在实际应用中,可能需要更复杂的匹配逻辑 + if image.ProductID == 0 { + image.ProductID = product.ID + } + } + // 更新原始数据中的商品ID(如果需要) + if i < len(batch) { + batch[i].Product.ID = product.ID + } + } + + // 批量插入图片(使用批次大小) + if err := tx.CreateInBatches(allProductImages, BatchSize).Error; err != nil { + return fmt.Errorf("批量保存商品图片失败: %w", err) + } + } + + successCount = int64(len(products)) + return nil + }) + + if err != nil { + log.Printf("思迅商品批量保存失败: %v", err) + return 0 + } + + return successCount +} + +// IsExitsSiXun 检查思迅商品是否已存在 +func (s *SxGoodsService) IsExitsSiXun(record *models.LibraryProduct) bool { + // 空记录直接返回不存在 + if record == nil { + return false + } + + m := mapper.LibraryProduct + q := m.Select(m.ID) + + // 优先使用条形码查询,无条形码时使用商品名称查询 + if record.Barcode != "" { + q = q.Where(m.Barcode.Eq(record.Barcode)) + } else if record.Name != "" { + q = q.Where(m.Name.Eq(record.Name)) + } else { + // 无有效查询条件 + return false + } + + // 执行数量统计查询 + cnt, err := q.Count() + if err != nil { + log.Printf("查询思迅商品数量失败,错误: %v", err) + return false + } + + return cnt > 0 +} + +// IsExitsSiXunByBarcode 根据条形码检查思迅商品是否已存在 +func (s *SxGoodsService) IsExitsSiXunByBarcode(barcode string) bool { + if barcode == "" { + return false + } + + m := mapper.LibraryProduct + q := m.Select(m.ID).Where(m.Barcode.Eq(barcode)) + + // 执行数量统计查询 + cnt, err := q.Count() + if err != nil { + log.Printf("根据条形码查询思迅商品数量失败,错误: %v", err) + return false + } + + return cnt > 0 +} diff --git a/src/services/sx_service.go b/src/services/sx_service.go new file mode 100644 index 0000000..5e568ea --- /dev/null +++ b/src/services/sx_service.go @@ -0,0 +1 @@ +package services diff --git a/src/util/cos_util.go b/src/util/cos_util.go new file mode 100644 index 0000000..90f7221 --- /dev/null +++ b/src/util/cos_util.go @@ -0,0 +1,224 @@ +package util + +import ( + "context" + "fmt" + "net/http" + "net/url" + "os" + "path/filepath" + "sync" + + "github.com/tencentyun/cos-go-sdk-v5" +) + +// COSConfig COS配置信息 +type COSConfig struct { + BucketURL string `mapstructure:"bucket_url"` + SecretID string `mapstructure:"secret_id"` + SecretKey string `mapstructure:"secret_key"` + BasePath string `mapstructure:"base_path"` +} + +// COSUtil COS工具类 +type COSUtil struct { + client *cos.Client + basePath string +} + +var ( + cosUtilInstance *COSUtil + once sync.Once +) + +// GetCOSUtil 获取COS工具类单例实例 +func GetCOSUtil(config COSConfig) (*COSUtil, error) { + var err error + once.Do(func() { + cosUtilInstance, err = newCOSUtil(config) + }) + return cosUtilInstance, err +} + +// newCOSUtil 创建新的COS工具类实例 +func newCOSUtil(config COSConfig) (*COSUtil, error) { + // 解析COS Bucket URL + bucketURL, err := url.Parse(config.BucketURL) + if err != nil { + return nil, fmt.Errorf("解析COS Bucket URL失败: %v", err) + } + + // 创建COS客户端 + client := cos.NewClient(&cos.BaseURL{BucketURL: bucketURL}, &http.Client{ + Transport: &cos.AuthorizationTransport{ + SecretID: config.SecretID, + SecretKey: config.SecretKey, + }, + }) + + return &COSUtil{ + client: client, + basePath: config.BasePath, + }, nil +} + +// UploadFile 上传单个文件到COS +func (c *COSUtil) UploadFile(localFilePath, cosPath string) error { + // 如果cosPath为空,则使用文件名 + if cosPath == "" { + cosPath = filepath.Base(localFilePath) + } + + // 添加基础路径前缀 + if c.basePath != "" { + cosPath = filepath.Join(c.basePath, cosPath) + } + + // 规范化路径分隔符 + cosPath = filepath.ToSlash(cosPath) + + // 打开本地文件 + file, err := os.Open(localFilePath) + if err != nil { + return fmt.Errorf("打开本地文件失败: %v", err) + } + defer file.Close() + + // 上传文件到COS + _, err = c.client.Object.Put(context.Background(), cosPath, file, nil) + if err != nil { + return fmt.Errorf("上传文件到COS失败: %v", err) + } + + return nil +} + +// BatchUpload 批量上传文件到COS(并发上传) +func (c *COSUtil) BatchUpload(fileMap map[string]string) error { + // 使用goroutine并发上传文件 + var wg sync.WaitGroup + errChan := make(chan error, len(fileMap)) + + // 遍历文件映射进行上传 + for localPath, cosPath := range fileMap { + wg.Add(1) + go func(localPath, cosPath string) { + defer wg.Done() + if err := c.UploadFile(localPath, cosPath); err != nil { + errChan <- fmt.Errorf("上传文件 %s 失败: %v", localPath, err) + } + }(localPath, cosPath) + } + + // 等待所有上传完成 + wg.Wait() + close(errChan) + + // 检查是否有错误 + for err := range errChan { + if err != nil { + return err + } + } + + return nil +} + +// UploadDirectory 上传整个目录到COS(并发上传) +func (c *COSUtil) UploadDirectory(localDir, cosDir string) error { + fileMap := make(map[string]string) + + // 遍历目录中的所有文件 + err := filepath.Walk(localDir, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + // 只处理文件,跳过目录 + if !info.IsDir() { + // 计算相对于本地目录的路径 + relPath, err := filepath.Rel(localDir, path) + if err != nil { + return fmt.Errorf("计算相对路径失败: %v", err) + } + + // 构造COS路径 + cosPath := filepath.Join(cosDir, relPath) + fileMap[path] = cosPath + } + + return nil + }) + + if err != nil { + return fmt.Errorf("遍历目录失败: %v", err) + } + + // 批量上传文件(并发) + return c.BatchUpload(fileMap) +} + +// ConcurrentUploadDirectory 并发上传整个目录到COS(使用COS SDK的并发上传功能) +func (c *COSUtil) ConcurrentUploadDirectory(localDir, cosDir string, workers int) error { + // 创建一个带缓冲的通道来控制并发数 + semaphore := make(chan struct{}, workers) + + // 使用WaitGroup等待所有goroutine完成 + var wg sync.WaitGroup + errChan := make(chan error, 1) // 用于传递错误 + + // 遍历目录中的所有文件 + err := filepath.Walk(localDir, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + // 只处理文件,跳过目录 + if !info.IsDir() { + // 计算相对于本地目录的路径 + relPath, err := filepath.Rel(localDir, path) + if err != nil { + return fmt.Errorf("计算相对路径失败: %v", err) + } + + // 构造COS路径 + cosPath := filepath.Join(cosDir, relPath) + + // 启动goroutine并发上传 + wg.Add(1) + go func(localPath, cosPath string) { + defer wg.Done() + + // 获取信号量 + semaphore <- struct{}{} + defer func() { <-semaphore }() // 释放信号量 + + // 执行上传 + if err := c.UploadFile(localPath, cosPath); err != nil { + // 只发送第一个错误 + select { + case errChan <- fmt.Errorf("上传文件 %s 失败: %v", localPath, err): + default: + } + } + }(path, cosPath) + } + + return nil + }) + + if err != nil { + return fmt.Errorf("遍历目录失败: %v", err) + } + + // 等待所有上传完成 + wg.Wait() + + // 检查是否有错误 + select { + case err := <-errChan: + return err + default: + return nil + } +} diff --git a/src/util/file_util.go b/src/util/file_util.go new file mode 100644 index 0000000..0a142bb --- /dev/null +++ b/src/util/file_util.go @@ -0,0 +1,426 @@ +package util + +import ( + "bufio" + "context" + "crypto/rand" + "encoding/hex" + "fmt" + "io" + "net/http" + "net/url" + "os" + "path/filepath" + "strings" + "sync" + "time" + "unicode" + + "log" + + "github.com/google/uuid" +) + +var ( + rootDir string + rootDirErr error + rootDirOnce sync.Once +) + +// 支持的图片扩展名(小写) +var imageExts = map[string]struct{}{ + ".jpg": {}, + ".jpeg": {}, + ".png": {}, + ".gif": {}, + ".bmp": {}, + ".webp": {}, + ".tiff": {}, + ".svg": {}, +} + +// GenerateRandomString generates a random string of the specified length. +func GenerateRandomString(length int) (string, error) { + bytes := make([]byte, length) + _, err := rand.Read(bytes) + if err != nil { + return "", err + } + return hex.EncodeToString(bytes)[:length], nil +} + +// IsEmpty checks if a string is empty. +func IsEmpty(s string) bool { + return len(s) == 0 +} + +// TrimString trims whitespace from the beginning and end of a string. +func TrimString(s string) string { + return strings.TrimSpace(s) +} + +// SplitString 切割字符串,支持空格和中文逗号 +func SplitTags(s string) []string { + // 使用 strings.FieldsFunc 分割字符串 + fields := strings.FieldsFunc(s, func(r rune) bool { + return unicode.IsSpace(r) || r == ',' + }) + + return fields +} + +func SplitTags2(s string) string { + return strings.Join(SplitTags(s), ",") +} + +func getRootDir() (string, error) { + rootDirOnce.Do(func() { + rootDir, rootDirErr = os.Getwd() + }) + return rootDir, rootDirErr +} + +// SaveImageFromURL 保存 URL 图片到本地 +func SaveImageFromURL(imgURL, filePartPath string) (filePath string, err error) { + if strings.HasPrefix(filePartPath, "http") { + return "", fmt.Errorf("保存路径有误") + } + // 1. 获取项目根目录 (只获取一次) + rootDir, err := getRootDir() + if err != nil { + return "", fmt.Errorf("获取项目根目录失败: %w", err) + } + + // 3. 拼接完整文件路径 + fileVirPath := filepath.Join(filePartPath, GetImageNameFromURL(imgURL)) + fileAbsPath := filepath.Join(rootDir, fileVirPath) + + // 4. 创建 media 文件夹(如果不存在) + if err = os.MkdirAll(filepath.Dir(fileAbsPath), 0755); err != nil { + return "", fmt.Errorf("创建 media 目录失败: %w", err) + } + + // 5. 创建目标文件 + file, err := os.Create(fileAbsPath) + if err != nil { + return "", fmt.Errorf("创建文件失败: %w", err) + } + defer func() { + if closeErr := file.Close(); closeErr != nil { + log.Printf("关闭文件失败: %v", closeErr) + } + }() + + // 6. 发起 HTTP 请求 + client := &http.Client{ + Timeout: 60 * time.Second, // 设置超时时间 + } + req, err := http.NewRequest("GET", imgURL, nil) + if err != nil { + return "", fmt.Errorf("创建 HTTP 请求失败: %w", err) + } + + // 使用 context 增加超时控制 + ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) + defer cancel() + + req = req.WithContext(ctx) + + resp, err := client.Do(req) + if err != nil { + return "", fmt.Errorf("HTTP 请求失败: %w", err) + } + defer func() { + if closeErr := resp.Body.Close(); closeErr != nil { + log.Printf("关闭响应 Body 失败: %v", closeErr) + } + }() + + // 7. 检查 HTTP 状态码 + if resp.StatusCode != http.StatusOK { + return "", fmt.Errorf("HTTP 状态码错误: %d", resp.StatusCode) + } + + // 8. 使用 io.CopyBuffer 高效复制内容 + buffer := make([]byte, 32768) // 32KB 缓冲区 + _, err = io.CopyBuffer(file, resp.Body, buffer) + if err != nil { + return "", fmt.Errorf("复制内容失败: %w", err) + } + + return fileVirPath, nil +} + +// GenerateUUIDFileName 通过 URL 获取文件后缀并生成带 UUID 的文件路径 +func GenerateUUIDFileName(fileURL string) string { + // 1. 解析 URL + parsedURL, err := url.Parse(fileURL) + if err != nil { + return "" + } + + // 2. 获取路径 + path := parsedURL.Path + + // 3. 获取文件后缀 + ext := filepath.Ext(path) + + // 4. 如果没有后缀,则默认为 .html + if ext == "" { + ext = ".png" + } + + // 5. 生成随机 UUID 文件名 + return fmt.Sprintf("%s%s", uuid.New().String(), ext) +} + +// GenerateUUID2FileName 生成随机 UUID 文件名 +func GenerateUUID2FileName(ext string, hasLine bool) string { + // 1. 生成 UUID + uuid := uuid.New().String() + + if !hasLine { + return fmt.Sprintf("%s%s", strings.ReplaceAll(uuid, "-", ""), ext) + } + + // 2. 拼接文件名 + return fmt.Sprintf("%s%s", uuid, ext) +} + +// GetImageNameFromURL 从给定的图片URL中提取出图片的文件名。 +// 这个函数主要用于处理和解析图片URL,以便获取图片的文件名部分。 +// 参数: +// +// imageURL - 图片的URL地址,字符串类型。 +// +// 返回值: +// +// 返回图片的文件名,如果无法解析或URL格式不正确,则返回空字符串。 +func GetImageNameFromURL(imageURL string) string { + parts := strings.Split(imageURL, "/") + if len(parts) == 0 { + return "" + } + lastPart := parts[len(parts)-1] + if lastPart == "" { + return "" + } + return filepath.Base(lastPart) +} + +// 图片处理器 +type ImageProcessor struct { + srcDir string // 源目录 + dstDir string // 目标目录 + fileExists map[string]int // 记录已存在的文件名及冲突次数 + mu sync.Mutex // 保护fileExists + wg sync.WaitGroup // 等待所有goroutine完成 +} + +// 新建图片处理器 +func NewImageProcessor(srcDir string) *ImageProcessor { + return &ImageProcessor{ + srcDir: srcDir, + dstDir: filepath.Join(srcDir, "merge"), + fileExists: make(map[string]int), + } +} + +// 检查文件是否为图片 +func (p *ImageProcessor) isImage(path string) bool { + ext := strings.ToLower(filepath.Ext(path)) + _, ok := imageExts[ext] + return ok +} + +// 处理单个图片文件 +func (p *ImageProcessor) processImage(srcPath string) { + defer p.wg.Done() + + // 获取文件名(带扩展名) + baseName := filepath.Base(srcPath) + + // 清理掉文件名不符合规范的字符 + titleClear := NewTitleCleaner() + baseName = titleClear.CleanForFileName(baseName) + + // 生成目标路径(处理重名) + dstPath := p.generateDstPath(baseName) + + // 创建目标目录 + dstDir := filepath.Dir(dstPath) + if err := os.MkdirAll(dstDir, 0755); err != nil { + fmt.Printf("错误:无法创建目录 %s: %v\n", dstDir, err) + return + } + + // 直接复制文件,不再尝试硬链接 + if err := p.copyFile(srcPath, dstPath); err != nil { + fmt.Printf("错误:无法复制文件 %s 到 %s: %v\n", srcPath, dstPath, err) + // 删除可能创建的不完整文件 + if _, statErr := os.Stat(dstPath); statErr == nil { + if removeErr := os.Remove(dstPath); removeErr != nil { + fmt.Printf("警告:无法删除不完整文件 %s: %v\n", dstPath, removeErr) + } + } + } +} + +// 复制文件(使用缓冲区提升性能) +func (p *ImageProcessor) copyFile(src, dst string) error { + srcFile, err := os.Open(src) + if err != nil { + return err + } + defer srcFile.Close() + + dstFile, err := os.Create(dst) + if err != nil { + return err + } + defer dstFile.Close() + + // 使用带缓冲区的复制(默认缓冲区为120KB) + buf := make([]byte, 120*1024) + writer := bufio.NewWriterSize(dstFile, 120*1024) + reader := bufio.NewReaderSize(srcFile, 120*1024) + written, err := io.CopyBuffer(writer, reader, buf) + if err != nil { + return err + } + if err = writer.Flush(); err != nil { + return err + } + + // 强制刷新缓冲区到磁盘 + if err = dstFile.Sync(); err != nil { + return fmt.Errorf("无法刷新文件缓冲区: %v", err) + } + + // 验证文件大小是否一致 + srcInfo, err := srcFile.Stat() + if err != nil { + return fmt.Errorf("无法获取源文件大小: %v", err) + } + if written != srcInfo.Size() { + return fmt.Errorf("文件复制不完整,预期大小: %d, 实际大小: %d", srcInfo.Size(), written) + } + return nil +} + +// 生成目标路径(处理重名) +func (p *ImageProcessor) generateDstPath(baseName string) string { + p.mu.Lock() + defer p.mu.Unlock() + + // 分离文件名和扩展名 + ext := filepath.Ext(baseName) + name := strings.TrimSuffix(baseName, ext) + + // 检查基础名称是否已存在 + count, exists := p.fileExists[name] + + if exists { + // 已存在,增加计数器 + count++ + p.fileExists[name] = count + // 生成新文件名:name_idx.ext + newName := fmt.Sprintf("%s_%d%s", name, count, ext) + return filepath.Join(p.dstDir, newName) + } else { + // 不存在,记录并返回原始名称 + p.fileExists[name] = 0 + return filepath.Join(p.dstDir, baseName) + } +} + +// 执行合并操作 +func (p *ImageProcessor) Merge() (string, error) { + // 创建merge目录 + if err := os.MkdirAll(p.dstDir, 0755); err != nil { + return "", fmt.Errorf("无法创建merge目录: %v", err) + } + + startTime := time.Now() + fmt.Printf("开始合并分散图片...\n源目录: %s\n目标目录: %s\n", p.srcDir, p.dstDir) + + // 递归遍历目录 + err := filepath.Walk(p.srcDir, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + // 跳过media、merge目录及merge目标目录 + if info.IsDir() && (strings.Contains(path, "media") || strings.Contains(path, "merge") || path == p.dstDir) { + return filepath.SkipDir + } + + // 如果是图片文件,启动goroutine处理 + if !info.IsDir() && p.isImage(path) { + p.wg.Add(1) + go p.processImage(path) + } + + return nil + }) + + if err != nil { + return "", fmt.Errorf("遍历目录时出错: %v", err) + } + + // 等待所有goroutine完成 + p.wg.Wait() + + elapsed := time.Since(startTime) + fmt.Printf("完成合并分散图片!共处理 %d 个图片文件,耗时: %v\n", len(p.fileExists), elapsed) + return p.dstDir, nil +} + +// 复制文件(使用缓冲区提升性能) +func CopyFile(srcFileName, dstFileName string) error { + // 创建目标文件所在目录 + dstDir := filepath.Dir(dstFileName) + if err := os.MkdirAll(dstDir, 0755); err != nil { + return fmt.Errorf("无法创建目标目录: %v", err) + } + + srcFile, err := os.Open(srcFileName) + if err != nil { + return err + } + defer srcFile.Close() + + dstFile, err := os.Create(dstFileName) + if err != nil { + return err + } + defer dstFile.Close() + + // 使用带缓冲区的复制(默认缓冲区为120KB) + buf := make([]byte, 120*1024) + writer := bufio.NewWriterSize(dstFile, 120*1024) + reader := bufio.NewReaderSize(srcFile, 120*1024) + written, err := io.CopyBuffer(writer, reader, buf) + if err != nil { + return err + } + + if err = writer.Flush(); err != nil { + return err + } + + // 强制刷新缓冲区到磁盘 + if err = dstFile.Sync(); err != nil { + return fmt.Errorf("无法刷新文件缓冲区: %v", err) + } + + // 验证文件大小是否一致 + srcInfo, err := srcFile.Stat() + if err != nil { + return fmt.Errorf("无法获取源文件大小: %v", err) + } + if written != srcInfo.Size() { + return fmt.Errorf("文件复制不完整,预期大小: %d, 实际大小: %d", srcInfo.Size(), written) + } + return nil +} diff --git a/src/util/goods_util.go b/src/util/goods_util.go new file mode 100644 index 0000000..525be34 --- /dev/null +++ b/src/util/goods_util.go @@ -0,0 +1,192 @@ +package util + +import ( + "regexp" + "strings" + + "github.com/mozillazg/go-pinyin" +) + +// 图片命名生成器 +type ImageNameGenerator struct { + MaxLength int // 最大文件名长度(不含扩展名) + Separator string // 字段分隔符 + UseTimestamp bool // 是否使用时间戳 + TimestampFormat string // 时间戳格式 +} + +// 预编译正则表达式(性能优化) +var ( + // 匹配连续空格 + spaceRegex = regexp.MustCompile(`\s+`) + // 匹配常见促销前缀(如:【现切果切】、【豪华果篮】等非核心信息) + promoPrefixRegex = regexp.MustCompile(`^[【\[][^】\]]+[】\]]`) + // 匹配促销后缀(如:限时折扣、买一送一等) + promoSuffixRegex = regexp.MustCompile(`(限时折扣|特价|促销|优惠|秒杀|买.*送.*|赠品|礼盒装|套装|活动|包邮|含税|整件|酸甜|默认切好|默认开小口)$`) + // 匹配括号内促销内容和规格外的括号内容 + promoInParenthesesRegex = regexp.MustCompile(`[((][^))]*?(促销|优惠|活动|新品|限时|特价|随机搭配|整果|默认|整件|酸甜|天然|纯木本植物原汁|随机)[^))]*?[))]|[((][^))]*?(约|左右|箱|个|件|盒|罐|瓶|毫升|斤|克|g|G)[^))]*?[))]`) + // 匹配特殊字符(保留中文、英文、数字、空格及核心符号:/+-_.()) + specialCharsRegex = regexp.MustCompile(`[^\p{Han}a-zA-Z0-9\s/+-_.()]`) +) + +// fullWidthToHalfWidth 将全角字符转换为半角字符 +// 用于统一字符宽度,确保后续文本处理(如正则匹配)的一致性 +// 参数: +// +// s - 可能包含全角字符的字符串 +// +// 返回值: +// +// 转换为半角字符后的字符串 +func fullWidthToHalfWidth(s string) string { + result := make([]rune, 0, len(s)) + for _, r := range s { + if r >= 65281 && r <= 65374 { + // 全角字符(除空格)转换为半角 + result = append(result, r-65248) + } else if r == 12288 { + // 全角空格转换为半角空格 + result = append(result, 32) + } else { + // 其他字符保持不变 + result = append(result, r) + } + } + return string(result) +} + +// CleanProductName 清洗商品名称 +// CleanProductName 清理电商商品标题,保留核心名称(品牌+产品名+规格) +// 遵循美团/京东/天猫规则:移除促销信息、营销话术、平台标识,保留商品本质特征 +// 参数: +// +// name - 原始商品标题 +// +// 返回值: +// +// 清理后的商品核心名称 +func CleanProductName(name string) string { + // 1. 全角转半角(优先统一字符宽度,避免后续正则匹配问题) + name = fullWidthToHalfWidth(name) + + // 2. 移除常见促销前缀(如:【旗舰店】、【2023新款】等非核心信息) + name = promoPrefixRegex.ReplaceAllString(name, "") + + // 3. 移除促销后缀(如:限时折扣、买一送一等) + name = promoSuffixRegex.ReplaceAllString(name, "") + + // 4. 移除括号内促销内容(保留括号外核心信息) + // 例:小米11(新品上市) → 小米11;iPhone 13(128G/黑色) → iPhone 13(128G/黑色)(保留规格) + name = promoInParenthesesRegex.ReplaceAllString(name, "") + + // 5. 移除特殊字符(保留中文、英文、数字、空格及核心符号:/+-_.()) + name = specialCharsRegex.ReplaceAllString(name, "") + + // 6. 标准化空格(多个空格→单个空格,首尾空格去除) + name = spaceRegex.ReplaceAllString(name, " ") + name = strings.TrimSpace(name) + + // 7. 移除重复品牌词(如:小米小米11 → 小米11) + if brand := extractBrand(name); brand != "" { + // 使用非贪婪匹配确保只替换开头的重复品牌词 + name = regexp.MustCompile(`^`+regexp.QuoteMeta(brand)+`{2,}`).ReplaceAllString(name, brand) + } + + return name +} + +// CleanProductForFilePath 清理商品名称以适应Linux文件路径规则 +// 将斜杠替换为空格,其他不允许的字符替换为下划线,并确保路径安全 +func CleanProductForFilePath(name string) string { + // 清除空格 + name = strings.ReplaceAll(name, " ", "") + // 将斜杠替换为空字符串 + name = strings.ReplaceAll(name, "/", "") + + // 替换Linux文件系统不允许的特殊字符 + invalidChars := `[\:*?"<>|]` + regex := regexp.MustCompile(invalidChars) + name = regex.ReplaceAllString(name, "") + + // 移除连续下划线 + regex = regexp.MustCompile(`_+`) + name = regex.ReplaceAllString(name, "") + + return strings.Trim(name, "_") +} + +// 中文转拼音 +// ChineseToPinyin 将中文转换为拼音(无声调) +// 用于生成符合文件系统命名规范的名称 +// 参数: +// +// s - 包含中文的字符串 +// +// 返回值: +// +// 转换后的拼音字符串 +func ChineseToPinyin(s string) string { + args := pinyin.NewArgs() + args.Style = pinyin.Normal // 普通风格,不带声调 + return strings.Join(pinyin.LazyConvert(s, &args), "") +} + +// ExtractKeywords 从商品名称中提取关键词 +// 用于图片命名时获取核心识别信息 +// 参数: +// +// name - 清理后的商品名称 +// limit - 最大关键词数量 +// +// 返回值: +// +// 提取的关键词列表 +func ExtractKeywords(name string, limit int) []string { + words := strings.Fields(name) + if len(words) > limit { + return words[:limit] + } + return words +} + +// SanitizeFileName 规范化文件名 +// 移除操作系统不允许的特殊字符,确保文件名合法 +// 参数: +// +// name - 原始文件名 +// +// 返回值: +// +// 安全的文件名 +func SanitizeFileName(name string) string { + // 移除Linux不允许的字符 + invalidChars := `[\/:*?"<>|]` + regex := regexp.MustCompile(invalidChars) + name = regex.ReplaceAllString(name, "_") + + // 移除连续下划线 + regex = regexp.MustCompile(`_+`) + name = regex.ReplaceAllString(name, "_") + + return strings.Trim(name, "_") +} + +// extractBrand 提取商品品牌(用于去重处理) +// 从商品名称开头提取已知品牌,支持中英文品牌名 +// 参数: +// +// name - 清理后的商品名称 +// +// 返回值: +// +// 提取到的品牌名,若未匹配则返回空字符串 +func extractBrand(name string) string { + // 常见品牌词库(可根据业务需求扩展) + brands := []string{"小米", "华为", "苹果", "iPhone", "三星", "OPPO", "vivo", "荣耀", "realme", "红米", "魅族"} + for _, b := range brands { + if strings.HasPrefix(name, b) { + return b + } + } + return "" +} diff --git a/src/util/str_util.go b/src/util/str_util.go new file mode 100644 index 0000000..7afe043 --- /dev/null +++ b/src/util/str_util.go @@ -0,0 +1,14 @@ +package util + +// contains 检查字符串切片中是否包含指定字符串 +// slice: 待检查的字符串切片 +// str: 要查找的目标字符串 +// 返回: 存在则返回true,否则返回false +func Contains(slice []string, str string) bool { + for _, s := range slice { + if s == str { + return true + } + } + return false +} diff --git a/src/util/title_clear.go b/src/util/title_clear.go new file mode 100644 index 0000000..9e15194 --- /dev/null +++ b/src/util/title_clear.go @@ -0,0 +1,483 @@ +package util + +// 【使用例子:】 +// func main() { +// cleaner := NewTitleCleaner() + +// // 测试用例(包含特殊字符) +// testCases := []string{ +// "【限时优惠】苹果 iPhone 14 Pro 256G 金色!", +// "耐克官方旗舰店 Air Max 270 男子运动鞋/黑/白", +// "(新品) 立白洗洁精500g*2瓶*促销装", +// "美团优选 山东红富士苹果 5斤装@新鲜水果", +// "盒马鲜生 智利车厘子 500g#特级", +// "钱大妈 土猪肉 五花肉 500g$新鲜现切", +// "大希地 西冷牛排 200g*4片%原切", +// "鲁花花生油 5L&压榨一级", +// "三只松鼠 碧根果 200g*袋装", +// "Samsung Galaxy S23 Ultra 256GB*Phantom Black", +// } + +// for _, title := range testCases { +// cleaned := cleaner.Clean(title) +// productName := cleaner.ExtractProductName(cleaned) +// spec := cleaner.ExtractSpec(cleaned) +// safeFileName := cleaner.CleanForFileName(title) + +// fmt.Printf("原始标题: %s\n", title) +// fmt.Printf("清洗后: %s\n", cleaned) +// fmt.Printf("商品名称: %s\n", productName) +// fmt.Printf("规格: %s\n", spec) +// fmt.Printf("安全文件名: %s\n\n", safeFileName) +// } + +// // 自定义保留符号示例 +// cleaner.SetFileNameSafeSymbols("-_. ") +// fmt.Println("自定义保留符号后的处理:") +// fmt.Printf("安全文件名: %s\n", cleaner.CleanForFileName("华为 MateBook 14s+16GB/512G@银")) +// } + +// 原始标题: 【限时优惠】苹果 iPhone 14 Pro 256G 金色! +// 清洗后: iPhone 14 Pro 256G 金色 +// 商品名称: iPhone 14 Pro 金色 +// 规格: 256G +// 安全文件名: iPhone 14 Pro 256G 金色 + +// 原始标题: 耐克官方旗舰店 Air Max 270 男子运动鞋/黑/白 +// 清洗后: Air Max 270 男子运动鞋 黑/白 +// 商品名称: Air Max 270 男子运动鞋 +// 规格: +// 安全文件名: Air Max 270 男子运动鞋 黑 白 + +// 原始标题: (新品) 立白洗洁精500g*2瓶*促销装 +// 清洗后: 洗洁精500g*2瓶 +// 商品名称: 洗洁精 +// 规格: 500g 2瓶 +// 安全文件名: 立白洗洁精500g 2瓶 + +// 原始标题: 美团优选 山东红富士苹果 5斤装@新鲜水果 +// 清洗后: 红富士苹果 5斤装 +// 商品名称: 红富士苹果 +// 规格: 5斤 +// 安全文件名: 红富士苹果 5斤装 + +// 原始标题: 盒马鲜生 智利车厘子 500g#特级 +// 清洗后: 车厘子 500g +// 商品名称: 车厘子 +// 规格: 500g +// 安全文件名: 车厘子 500g + +// 原始标题: 钱大妈 土猪肉 五花肉 500g$新鲜现切 +// 清洗后: 五花肉 500g +// 商品名称: 五花肉 +// 规格: 500g +// 安全文件名: 土猪肉 五花肉 500g + +// 原始标题: 大希地 西冷牛排 200g*4片%原切 +// 清洗后: 西冷牛排 200g*4片 +// 商品名称: 西冷牛排 +// 规格: 200g 4片 +// 安全文件名: 大希地 西冷牛排 200g 4片 + +// 原始标题: 鲁花花生油 5L&压榨一级 +// 清洗后: 花生油 5L +// 商品名称: 花生油 +// 规格: 5L +// 安全文件名: 鲁花花生油 5L + +// 原始标题: 三只松鼠 碧根果 200g*袋装 +// 清洗后: 碧根果 200g +// 商品名称: 碧根果 +// 规格: 200g +// 安全文件名: 三只松鼠 碧根果 200g + +// 原始标题: Samsung Galaxy S23 Ultra 256GB*Phantom Black +// 清洗后: Galaxy S23 Ultra 256GB Phantom Black +// 商品名称: Galaxy S23 Ultra Phantom Black +// 规格: 256GB +// 安全文件名: Samsung Galaxy S23 Ultra 256GB Phantom Black + +// 自定义保留符号后的处理: +// 安全文件名: 华为 MateBook 14s+16GB_512G 银 + +import ( + "regexp" + "strings" +) + +// 商品标题清洗器 +type TitleCleaner struct { + // 促销词列表 + promotionWords []string + // 店铺类型列表 + shopTypes []string + // 品牌名列表(支持多语言) + brands map[string]bool + // 规格单位列表 + specUnits map[string]bool + // 地域词列表 + regions map[string]bool + // 生鲜品类词列表 + freshCategories map[string]bool + // 文件名非法字符正则(匹配Windows和Linux均不允许的字符) + reInvalidFileNameChars *regexp.Regexp + // 数字匹配正则 + reDigits *regexp.Regexp + // 其他正则表达式缓存 + reBrackets *regexp.Regexp + reSymbols *regexp.Regexp + reSpaces *regexp.Regexp + reLeadingBrands *regexp.Regexp + reSpecPatterns []*regexp.Regexp + reFreshPatterns []*regexp.Regexp +} + +// 创建新的标题清洗器 +func NewTitleCleaner() *TitleCleaner { + cleaner := &TitleCleaner{ + promotionWords: []string{ + "限时优惠", "特价", "促销", "折扣", "满减", "秒杀", "包邮", "热卖", "新品", + "特惠", "赠礼", "活动", "爆款", "超值", "福利", "限时", "疯抢", "清仓", + "特价", "大促", "满赠", "限时购", "季末清仓", "限时秒杀", "狂欢价", + "券后", "折后", "今日特惠", "限时价", "会员价", "新客价", "新人专享", + "立减", "直降", "低价", "惊爆价", "冰点价", "吐血价", "大酬宾", "大甩卖", + "超值套餐", "组合优惠", "套装", "礼盒装", "多件优惠", "买一送一", "第二件半价", + "for sale", "discount", "special offer", "promotion", "limited time", "free shipping", + "new arrival", "hot deal", "best price", "flash sale", "bundle", "gift with purchase", + }, + shopTypes: []string{ + "旗舰店", "专卖店", "专营店", "官方店", "自营店", "直销店", + "工厂店", "折扣店", "品牌店", "授权店", "精品店", "加盟店", + "体验店", "便利店", "超市", "卖场", "商城", "全球购", "海外购", + "进口", "直营", "官方", "官方授权", "授权", "官方旗舰店", + "store", "shop", "official", "authorized", "boutique", "premium", + "outlet", "factory", "online", "global", "international", "生鲜", "果蔬", "食品", + }, + brands: make(map[string]bool), + specUnits: make(map[string]bool), + regions: make(map[string]bool), + freshCategories: make(map[string]bool), + reInvalidFileNameChars: regexp.MustCompile(`[\\/:*?"<>|]`), // Windows/Linux均禁止的字符 + reDigits: regexp.MustCompile(`\d+`), + reBrackets: regexp.MustCompile(`\([^)]*\)|\[[^]]*]|<[^>]*>|\{[^}]*\}|([^)]*)|【[^】]*】|〔[^〕]*〕`), + reSymbols: regexp.MustCompile(`[^\p{Han}\p{Latin}0-9xX*×+\-*/\\\s]`), + reSpaces: regexp.MustCompile(`\s+`), + reLeadingBrands: regexp.MustCompile(`^[\p{Han}\p{Latin}0-9]+[、\s\-::]*`), + reSpecPatterns: []*regexp.Regexp{ + regexp.MustCompile(`\d+[a-zA-Z]+`), // 数字+单位,如500ml + regexp.MustCompile(`\d+[xX*×]\d+`), // 数字x数字,如10x20 + regexp.MustCompile(`\d+[xX*×]\d+[xX*×]\d+`), // 三维尺寸,如10x20x30 + regexp.MustCompile(`\d+[+\-*/]\d+`), // 数字+运算符+数字,如12+3 + regexp.MustCompile(`\d+\.\d+[a-zA-Z]+`), // 小数+单位,如3.5kg + regexp.MustCompile(`\d+\.\d+`), // 纯小数,如3.5 + regexp.MustCompile(`\d+/\d+`), // 分数,如1/2 + regexp.MustCompile(`\d+[件盒瓶包箱袋支个只袋卷罐片粒包本张套斤两克千克升毫升箱组台袋条根把捆盒箱罐桶份袋枚]`), // 中文数量单位 + regexp.MustCompile(`\d+[a-zA-Z]+/\d+[a-zA-Z]+`), // 复合单位,如500ml/瓶 + regexp.MustCompile(`\d+[a-zA-Z]+[a-zA-Z]+`), // 数字+单位复数,如10grams + regexp.MustCompile(`\d+[a-zA-Z]*\s+[a-zA-Z]+`), // 数字+空格+单位,如10 pieces + regexp.MustCompile(`\d+[至到]\d+[a-zA-Z]+`), // 范围表示,如10-20kg + regexp.MustCompile(`\d+[至到]\d+[件盒瓶包箱袋支个只袋卷罐片粒包本张套斤两克千克升毫升箱组台]`), // 中文范围 + }, + reFreshPatterns: []*regexp.Regexp{ + regexp.MustCompile(`[\p{Han}]+[肉禽蛋鱼海鲜果蔬粮油][\p{Han}]*`), // 生鲜品类识别 + regexp.MustCompile(`[\p{Han}]+[果菜菇豆稻麦米][\p{Han}]*`), // 果蔬品类识别 + regexp.MustCompile(`[\p{Han}]+[猪牛羊鸡鱼虾蟹][\p{Han}]*`), // 肉类识别 + }, + } + + // 添加国内常见品牌 + cleaner.AddBrands([]string{ + "苹果", "华为", "小米", "三星", "耐克", "阿迪达斯", "可口可乐", "立白", "海尔", + "美的", "格力", "五粮液", "茅台", "旺旺", "伊利", "蒙牛", "宝洁", "联合利华", + "双汇", "雨润", "三全", "思念", "太太乐", "王守义", "老干妈", "海天", "李锦记", + "娃哈哈", "康师傅", "统一", "农夫山泉", "蒙牛", "伊利", "光明", "旺旺", "达利园", + "三只松鼠", "良品铺子", "百草味", "来伊份", "海底捞", "好丽友", "雀巢", "奥利奥", + "乐事", "可比克", "喜之郎", "旺旺", "大白兔", "冠生园", "梅林", "王守义", "乌江", + "洽洽", "溜溜梅", "有友", "卫龙", "良品铺子", "三只松鼠", "獐子岛", "北大荒", + "阳澄湖", "恒顺", "太太乐", "加加", "李锦记", "鲁花", "金龙鱼", "福临门", "海天", + }) + + // 添加生鲜、食品类品牌 + cleaner.AddBrands([]string{ + "獐子岛", "正大", "大希地", "恒都", "科尔沁", "皓月", "钱大妈", "盒马", "山姆", + "沃尔玛", "永辉", "家乐福", "麦德龙", "大润发", "物美", "华润万家", "百果园", + "鲜丰水果", "绿叶水果", "每日优鲜", "叮咚买菜", "美团优选", "淘菜菜", "多多买菜", + "海底捞", "德庄", "桥头", "好人家", "天味", "鹃城牌", "饭扫光", "李锦记", "海天", + "鲁花", "金龙鱼", "福临门", "太太乐", "王守义", "老干妈", "乌江", "涪陵", "有友", + }) + + // 添加常见规格单位(增强生鲜类) + cleaner.AddSpecUnits([]string{ + "g", "kg", "mg", "ml", "l", "oz", "lb", "cm", "mm", "m", "inch", "ft", + "件", "盒", "瓶", "包", "箱", "袋", "支", "个", "只", "卷", "罐", "片", "粒", "包", "本", "张", "套", "斤", "两", + "kg", "g", "ml", "l", "cm", "m", "mm", "inch", "ft", "yd", "oz", "lb", + "kilo", "gram", "milliliter", "liter", "centimeter", "meter", "millimeter", + "inch", "foot", "yard", "ounce", "pound", "pack", "box", "case", "bottle", + "piece", "set", "dozen", "pair", "sheet", "roll", "bag", "carton", "portion", + "份", "枚", "条", "根", "把", "捆", "簇", "串", "头", "尾", "块", "朵", "颗", "株", + }) + + // 添加常见地域词 + cleaner.AddRegions([]string{ + "山东", "泰国", "进口", "国产", "美国", "日本", "韩国", "欧洲", "澳洲", "新西兰", + "made in usa", "made in japan", "imported", "海外", "全球", "本地", "有机", "无公害", + "绿色", "生态", "天然", "野生", "养殖", "散养", "土生", "本地", "农家", "原生态", + }) + + // 添加生鲜品类词 + cleaner.AddFreshCategories([]string{ + "蔬菜", "水果", "肉类", "海鲜", "水产", "粮油", "干货", "禽蛋", "奶制品", "速冻", + "猪", "牛", "羊", "鸡", "鸭", "鹅", "鱼", "虾", "蟹", "贝", "蛋", "奶", "豆", + "苹果", "香蕉", "橙子", "梨", "葡萄", "草莓", "西瓜", "哈密瓜", "芒果", "菠萝", + "白菜", "萝卜", "土豆", "西红柿", "黄瓜", "茄子", "辣椒", "豆角", "洋葱", "大蒜", + "猪肉", "牛肉", "羊肉", "鸡肉", "鸭肉", "鱼肉", "虾肉", "蟹肉", "贝肉", "鸡蛋", "鸭蛋", + }) + + return cleaner +} + +// 添加品牌到清洗器 +func (c *TitleCleaner) AddBrand(brand string) { + c.brands[strings.ToLower(normalizeString(brand))] = true +} + +// 批量添加品牌 +func (c *TitleCleaner) AddBrands(brands []string) { + for _, brand := range brands { + c.AddBrand(brand) + } +} + +// 添加规格单位 +func (c *TitleCleaner) AddSpecUnit(unit string) { + c.specUnits[strings.ToLower(unit)] = true +} + +// 批量添加规格单位 +func (c *TitleCleaner) AddSpecUnits(units []string) { + for _, unit := range units { + c.AddSpecUnit(unit) + } +} + +// 添加地域词 +func (c *TitleCleaner) AddRegion(region string) { + c.regions[strings.ToLower(normalizeString(region))] = true +} + +// 批量添加地域词 +func (c *TitleCleaner) AddRegions(regions []string) { + for _, region := range regions { + c.AddRegion(region) + } +} + +// 添加生鲜品类词 +func (c *TitleCleaner) AddFreshCategory(category string) { + c.freshCategories[strings.ToLower(normalizeString(category))] = true +} + +// 批量添加生鲜品类词 +func (c *TitleCleaner) AddFreshCategories(categories []string) { + for _, category := range categories { + c.AddFreshCategory(category) + } +} + +// 清洗商品标题(移除冗余信息,保留核心内容) +func (c *TitleCleaner) Clean(title string) string { + // 0. 字符串标准化(处理特殊字符和大小写) + originalTitle := title + title = normalizeString(title) + + // 1. 去除括号及内部内容 + title = c.reBrackets.ReplaceAllString(title, "") + + // 2. 去除常见促销词 + for _, word := range c.promotionWords { + word = normalizeString(word) + title = strings.ReplaceAll(title, word, "") + } + + // 3. 去除店铺类型标识 + for _, shopType := range c.shopTypes { + shopType = normalizeString(shopType) + title = strings.ReplaceAll(title, shopType, "") + } + + // 4. 去除地域词 + words := strings.Fields(title) + for _, word := range words { + wordLower := strings.ToLower(word) + if c.regions[wordLower] { + title = strings.ReplaceAll(title, word, "") + } + } + + // 5. 处理品牌名(保留品牌名,不做移除处理) + // 注释掉品牌名移除逻辑,以保留品牌信息 + /* + words = strings.Fields(title) + if len(words) > 0 { + firstWord := strings.ToLower(words[0]) + if c.brands[firstWord] { + // 移除开头的品牌名 + title = strings.TrimSpace(strings.TrimPrefix(title, words[0])) + } else if len(words) > 1 { + // 尝试匹配多词品牌 + twoWordBrand := strings.ToLower(words[0] + " " + words[1]) + if c.brands[twoWordBrand] { + title = strings.TrimSpace(strings.TrimPrefix(title, words[0]+" "+words[1])) + } + } + } + */ + + // 6. 处理生鲜品类词(注释掉以保留核心名称中的品类信息) + /* + words = strings.Fields(title) + if len(words) > 0 { + firstWord := strings.ToLower(words[0]) + if c.freshCategories[firstWord] { + // 移除开头的品类词 + title = strings.TrimSpace(strings.TrimPrefix(title, words[0])) + } + } + */ + + // 7. 移除多余符号,保留规格相关符号(如数字、小数点、单位等) + title = c.reSymbols.ReplaceAllString(title, "") + + // 8. 合并连续空格 + title = c.reSpaces.ReplaceAllString(title, " ") + + // 9. 去除首尾空格 + title = strings.TrimSpace(title) + + // 10. 特殊处理:如果清洗后标题为空,返回原始标题的简化版 + if title == "" { + title = normalizeString(originalTitle) + title = c.reBrackets.ReplaceAllString(title, "") + title = c.reSymbols.ReplaceAllString(title, "") + title = c.reSpaces.ReplaceAllString(title, " ") + title = strings.TrimSpace(title) + } + + return title +} + +// 提取规格信息 +func (c *TitleCleaner) ExtractSpec(title string) string { + var specs []string + + // 先尝试使用正则表达式匹配 + for _, re := range c.reSpecPatterns { + specs = append(specs, re.FindAllString(title, -1)...) + } + + // 再尝试识别数字+单位的组合 + words := strings.Fields(title) + for i, word := range words { + if c.reDigits.MatchString(word) { + // 数字后面可能跟着单位 + if i+1 < len(words) { + nextWord := strings.ToLower(words[i+1]) + if c.specUnits[nextWord] { + specs = append(specs, word+" "+nextWord) + } + } + + // 检查数字和字母组合的单位 + reUnit := regexp.MustCompile(`(\d+)([a-zA-Z]+)`) + if reUnit.MatchString(word) { + matches := reUnit.FindStringSubmatch(word) + if len(matches) == 3 { + unit := strings.ToLower(matches[2]) + if c.specUnits[unit] { + specs = append(specs, word) + } + } + } + + // 检查范围表示,如"10-20kg" + reRange := regexp.MustCompile(`(\d+)([至到])(\d+)([a-zA-Z]+)`) + if reRange.MatchString(word) { + specs = append(specs, word) + } + } + } + + return strings.Join(unique(specs), " ") +} + +// 提取核心商品名称 +func (c *TitleCleaner) ExtractProductName(title string) string { + spec := c.ExtractSpec(title) + if spec == "" { + return title + } + + // 从标题中移除规格部分 + re := regexp.MustCompile(regexp.QuoteMeta(spec)) + name := re.ReplaceAllString(title, "") + + // 清理剩余的空格和符号 + name = strings.TrimSpace(name) + name = regexp.MustCompile(`\s+`).ReplaceAllString(name, " ") + + return name +} + +// 仅清理不符合文件命名规范的特殊字符,保留所有原始信息 +func (c *TitleCleaner) CleanForFileName(originalTitle string) string { + // 1. 删除所有非法字符(Windows/Linux均禁止的字符:\/:*?"<>|) + safeTitle := c.reInvalidFileNameChars.ReplaceAllString(originalTitle, "") + + // 2. 删除所有空格 + safeTitle = regexp.MustCompile(`\s+`).ReplaceAllString(safeTitle, "") + + // 3. 移除尾部点号(Windows不允许文件名以点结尾) + safeTitle = strings.TrimRight(safeTitle, ".") + + // 4. 限制文件名长度(Windows最大255字符,Linux最大255字节) + if len(safeTitle) > 255 { + safeTitle = safeTitle[:255] + } + + return safeTitle +} + +// 字符串标准化(处理特殊字符和大小写) +func normalizeString(s string) string { + // 转换为小写 + s = strings.ToLower(s) + + // 处理特殊字符 + s = strings.ReplaceAll(s, "™", "") + s = strings.ReplaceAll(s, "®", "") + s = strings.ReplaceAll(s, "©", "") + + // 处理全角字符 + s = strings.Map(func(r rune) rune { + if r >= '!' && r <= '~' { + // 全角字符转换为半角 + return r - 0xfee0 + } + return r + }, s) + + return s +} + +// 去重 +func unique(strs []string) []string { + keys := make(map[string]bool) + list := []string{} + for _, entry := range strs { + if _, exists := keys[entry]; !exists { + keys[entry] = true + list = append(list, entry) + } + } + return list +} diff --git a/src/util/util.go b/src/util/util.go new file mode 100644 index 0000000..1f26516 --- /dev/null +++ b/src/util/util.go @@ -0,0 +1,44 @@ +package util + +import "strings" + +// CleanGoodsTitle 清洗电商商品标题,保留品牌+核心商品名称+规格 +// 整合美团、京东、天猫平台的清洗规则,提取商品关键信息 +// 示例:"【限时优惠】苹果 iPhone 14 Pro 256G 金色!" → "iPhone 14 Pro 金色 256G" +func CleanGoodsTitle(title string) string { + // 创建标题清洗器实例,复用现有清洗规则 + cleaner := NewTitleCleaner() + + // 1. 基础清洗:移除促销词、店铺类型、特殊符号等冗余信息 + cleanedTitle := cleaner.Clean(title) + if cleanedTitle == "" { + return title // 清洗失败时返回原始标题 + } + + // 2. 提取规格信息(如500g、256G、5斤装等) + spec := cleaner.ExtractSpec(cleanedTitle) + + // 3. 提取核心商品名称(已移除规格部分) + coreName := cleaner.ExtractProductName(cleanedTitle) + + // 4. 组合品牌+核心名称+规格,确保格式正确 + var result strings.Builder + result.WriteString(coreName) + if spec != "" { + // 确保规格前有空格分隔 + if result.Len() > 0 { + result.WriteString(" ") + } + result.WriteString(spec) + } + + // 5. 最终清理多余空格 + finalResult := strings.TrimSpace(result.String()) + + // 6. 降级处理:如果结果为空则返回基础清洗后的标题 + if finalResult == "" { + return cleanedTitle + } + + return finalResult +}