diff --git a/.github/workflows/generate_missing_mappings.yml b/.github/workflows/generate_missing_mappings.yml deleted file mode 100644 index 3fc539b72..000000000 --- a/.github/workflows/generate_missing_mappings.yml +++ /dev/null @@ -1,79 +0,0 @@ -name: Generate and commit mapping changes to requested PR - -on: - issue_comment: - types: [created] - -jobs: - generate_missing_mappings: - runs-on: shopify-ubuntu-latest - if: ${{github.event.issue.pull_request && github.event.comment.body == '/generate_mappings'}} - - steps: - - uses: actions/checkout@v4 - - name: Checkout PR - run: gh pr checkout $ISSUE - env: - GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} - ISSUE: ${{ github.event.issue.html_url }} - - uses: ruby/setup-ruby@v1 - with: - bundler-cache: true - - name: Set up Podman - run: | - sudo apt-get update - sudo apt-get -y install podman - - name: Launch Qdrant server - run: | - port=6333 - podman run -d -p ${port}:${port} qdrant/qdrant - echo "Waiting for Qdrant server to be ready..." - sleep 25 - - uses: cue-lang/setup-cue@v1.0.0 - with: - version: 'v0.7.0' - - name: Generate distribution files to gather all published mappings in in json format - run: make VERBOSE=1 - - name: Generate missing mappings - env: - OPENAI_API_TOKEN: ${{ secrets.OPENAI_API_TOKEN }} - run: | - bin/generate_missing_mappings - - name: Clean cache and re-generate mapping distribution files - run: | - make clean - make VERBOSE=1 - - name: Commit mapping changes - id: commit_changes - run: | - git config --global user.name "github-actions[bot]" - git config --global user.email "github-actions[bot]@users.noreply.github.com" - git add dist/*/integrations - git add data/integrations - commit_output=$(git commit -m "🤖 Update missing mappings" || echo "No changes to commit") - echo "$commit_output" - echo "::set-output name=commit_message::$commit_output" - git push || echo "No changes to push" - - name: Read disagree mapping entries - id: read_content - run: | - if [ -f tmp/mapping_update_message.txt ]; then - meaningful_content=$(cat tmp/mapping_update_message.txt) - else - meaningful_content="No meaningful content provided." - fi - echo "meaningful_content<> $GITHUB_ENV - echo "$meaningful_content" >> $GITHUB_ENV - echo "EOF" >> $GITHUB_ENV - - name: Add mapping grading comment to PR - if: steps.commit_changes.outputs.commit_message != 'No changes to commit' - uses: actions/github-script@v6 - with: - script: | - const body = process.env.meaningful_content; - github.rest.issues.createComment({ - issue_number: ${{ github.event.issue.number }}, - owner: context.repo.owner, - repo: context.repo.repo, - body: body - }) diff --git a/Gemfile b/Gemfile index 1456d37b1..ff607eb5f 100644 --- a/Gemfile +++ b/Gemfile @@ -21,7 +21,6 @@ gem "tty-option", "~> 0.3" # generate taxonomy mappings gem "qdrant-ruby" -gem "bigdecimal" gem "ruby-openai" group :development, :test do diff --git a/Gemfile.lock b/Gemfile.lock index 0e51c9558..2d3cbf1ab 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -356,7 +356,6 @@ PLATFORMS x86_64-linux-musl DEPENDENCIES - bigdecimal bootsnap cli-ui (~> 2.2) debug diff --git a/bin/generate_missing_mappings b/bin/generate_missing_mappings index 6f8bd5281..8fd4d0990 100755 --- a/bin/generate_missing_mappings +++ b/bin/generate_missing_mappings @@ -3,10 +3,6 @@ require_relative "../config/environment" require "qdrant" -require "bigdecimal" -require "openai" -require "csv" -require "yaml" class TaxonomyMapper MAX_RETRIES = 3 @@ -21,8 +17,7 @@ class TaxonomyMapper end def run - mappings_json_data = System.new.parse_json("dist/en/integrations/all_mappings.json") - shopify_categories_missing_mapping_groups = find_unmapped_shopify_categories(mappings_json_data) + shopify_categories_missing_mapping_groups = find_unmapped_shopify_categories return if shopify_categories_missing_mapping_groups.empty? shopify_categories_missing_mapping_groups.each do |missing_mapping_group| @@ -54,61 +49,31 @@ class TaxonomyMapper puts "Started Qdrant server in the background with PID #{pid}." end - def find_unmapped_shopify_categories(mappings_json_data) + def find_unmapped_shopify_categories shopify_categories_lack_mappings = [] - mappings_json_data["mappings"].each do |mapping| - next unless mapping["input_taxonomy"] == LATEST_SHOPIFY_VERSION - - all_shopify_category_ids = category_ids_from_taxonomy(mapping["input_taxonomy"]) - next if all_shopify_category_ids.nil? - - shopify_category_ids_from_mappings_input = mapping["rules"] - .map { |rule| rule.dig("input", "category", "id") } - .to_set - + all_shopify_category_ids = Set.new(Category.all.pluck(:id)) + MappingRule.where(input_version: LATEST_SHOPIFY_VERSION).group_by(&:output_version).each do |output_version, mappings| + shopify_category_ids_from_mappings_input = Set.new( + mappings.map do |mapping| + mapping.input.product_category_id.split("/").last + end, + ) unmapped_category_ids = all_shopify_category_ids - shopify_category_ids_from_mappings_input - unmapped_category_ids.map! { |id| id.split("/").last } - category_ids_full_names = get_category_full_names(unmapped_category_ids) - + category_ids_full_names = unmapped_category_ids.sort.map do |id| + category_full_name = Category.find(id)&.full_name + [id, category_full_name] if category_full_name + end.compact.to_h next if category_ids_full_names.empty? shopify_categories_lack_mappings << { - input_taxonomy: mapping["input_taxonomy"], - output_taxonomy: mapping["output_taxonomy"], + input_taxonomy: mappings.first.input_version, + output_taxonomy: output_version, category_ids_full_names: category_ids_full_names, } end shopify_categories_lack_mappings end - def category_ids_from_taxonomy(input_or_output_taxonomy) - if input_or_output_taxonomy.include?("shopify") && !input_or_output_taxonomy.include?("shopify/2022-02") - shopify_category_ids_from_json - else - channel_category_ids_from_yaml(input_or_output_taxonomy) - end - end - - def shopify_category_ids_from_json - categories_json_data = System.new.parse_json("dist/en/categories.json") - categories_json_data["verticals"].flat_map do |vertical| - vertical["categories"].map { |category| category["id"] } - end.to_set - end - - def channel_category_ids_from_yaml(taxonomy) - file_path = "data/integrations/#{taxonomy}/full_names.yml" - channel_taxonomy = System.new.parse_yaml(file_path) - channel_taxonomy.map { |entry| entry["id"].to_s }.to_set - end - - def get_category_full_names(category_ids) - category_ids.each_with_object({}) do |id, hash| - category_full_name = Category.find(id)&.full_name - hash[id] = category_full_name if category_full_name - end - end - def load_embedding_data(output_taxonomy) files = Dir.glob(File.join("data/integrations/#{output_taxonomy}/embeddings", "_*.txt")) files.each_with_object({}) do |partition, embedding_data|