Bulkdelete 高效,稳定删除上亿条数据。

以下是一个基于 Entity Framework Extensions 的 BulkDelete 高效删除海量数据的方案,包含性能优化、稳定性保障和错误处理机制:

csharp
using System;
using System.Linq;
using Z.EntityFramework.Plus;

public class BulkDeleteService
{
private const int BatchSize = 50000; // 根据服务器内存和网络调整
private const int MaxRetryAttempts = 3;
private readonly TimeSpan RetryDelay = TimeSpan.FromSeconds(5);

public void BulkDeleteRecords(MyDbContext context, DateTime deleteBefore)
{
    var query = context.YourEntities
        .Where(x => x.CreatedDate < deleteBefore)
        .OrderBy(x => x.Id); // 按主键排序确保分页稳定

    int totalDeleted = 0;
    bool hasMoreData = true;
    
    var deleteOptions = new BulkDeleteOptions
    {
        BatchSize = BatchSize,
        UseTransaction = false, // 关闭事务提升性能
        CommandTimeout = 600,    // 适当延长超时时间
        ExecuteHook = (x) => 
        {
            // 内存优化配置
            x.BatchTimeout = 60;
            x.TrackingEntities = false;
        }
    };

    while (hasMoreData)
    {
        try
        {
            // 分页删除(使用子查询确保分页准确)
            var subQuery = query.Take(BatchSize);
            int deletedCount = RetryPolicy(() => subQuery.DeleteFromQuery(deleteOptions));

            totalDeleted += deletedCount;
            hasMoreData = deletedCount == BatchSize;

            Console.WriteLine($"Deleted {totalDeleted:N0} records, LastID: {GetLastProcessedId(context)}");
            
            // 定期释放资源
            if (totalDeleted % (BatchSize * 10) == 0)
            {
                context.Dispose();
                context = new MyDbContext();
                query = context.YourEntities.Where(...); // 重建查询
            }
        }
        catch (Exception ex)
        {
            LogError(ex, totalDeleted);
            throw; // 根据业务需求决定是否终止
        }
    }
}

private int RetryPolicy(Func<int> action)
{
    int attempts = 0;
    while (true)
    {
        try
        {
            return action();
        }
        catch (SqlException ex) when (attempts < MaxRetryAttempts)
        {
            if (IsTransientError(ex.Number))
            {
                attempts++;
                Thread.Sleep(RetryDelay);
                continue;
            }
            throw;
        }
    }
}

private bool IsTransientError(int errorNumber)
{
    // SQL Server 瞬态错误代码列表
    int transientErrors = { 4060, 40197, 40501, 40613, 49918, 49919, 49920, 4221 };
    return transientErrors.Contains(errorNumber);
}

private long GetLastProcessedId(MyDbContext context)
{
    return context.YourEntities
        .OrderByDescending(x => x.Id)
        .Select(x => x.Id)
        .FirstOrDefault();
}

private void LogError(Exception ex, int processedCount)
{
    // 记录错误和当前进度到持久化存储
    File.AppendAllText("delete_log.txt", 
        $"{DateTime.UtcNow:u} Error after {processedCount} records: {ex}\n");
}

}

关键优化点:

  1. 分页策略优化:
  • 使用 Take(BatchSize).DeleteFromQuery() 确保准确分页
  • 按主键排序避免数据重复或遗漏
  • 通过子查询实现稳定分页
  1. 性能增强:
  • 禁用实体跟踪 (TrackingEntities = false)
  • 批量提交而非事务包裹整个操作
  • 定期重建 DbContext 释放内存
  • 调整批量大小(建议5万-10万/批)
  1. 稳定性保障:
  • 瞬态错误重试机制(支持SQL超时、连接中断等)
  • 进度记录到文件(支持断点续删)
  • 独立的错误日志记录
  • 内存泄漏预防(定期释放上下文)
  1. 数据库优化建议:
    sql
    — 执行前建议操作
    ALTER DATABASE YourDB SET RECOVERY SIMPLE;
    EXEC sp_configure ‘max server memory’, ‘4096’; — 根据服务器调整
    ALTER DATABASE YourDB SET DELAYED_DURABILITY = FORCED;

— 执行后建议
UPDATE STATISTICS YourTable WITH FULLSCAN;
ALTER INDEX ALL ON YourTable REBUILD;

  1. 执行策略:
  • 建议在低峰时段执行
  • 提前备份目标数据
  • 使用数据库镜像/AlwaysOn在从库执行
  • 监控锁状态(sp_who2, sys.dm_tran_locks)
  1. 高级方案:
    csharp
    // 分区并行删除(需要物理分区)
    Parallel.ForEach(GetPartitionRanges(), range =>
    {
    using var ctx = new MyDbContext();
    ctx.YourEntities
    .Where(x => x.Id > range.Start && x.Id <= range.End)
    .DeleteFromQuery();
    });

// 时间范围分段删除
var dateRanges = GenerateDateChunks(deleteBefore);
foreach (var range in dateRanges)
{
context.YourEntities
.Where(x => x.CreatedDate >= range.Start && x.CreatedDate < range.End)
.DeleteFromQuery();
}

注意事项:

  1. 建议先在测试环境验证删除逻辑
  2. 确保删除条件字段有合适的索引(CreatedDate或ID)
  3. 监控事务日志增长(特别是FULL恢复模式)
  4. 考虑使用Table Partitioning处理持续的大数据删除

对于超大规模数据(超过1亿条),建议结合数据库原生工具(如SQL Server的Partition Switching)实现秒级数据删除。

来源链接:https://www.cnblogs.com/jtxs/p/18854893

© 版权声明
THE END
支持一下吧
点赞10 分享
评论 抢沙发
头像
请文明发言!
提交
头像

昵称

取消
昵称表情代码快捷回复

    暂无评论内容