はじめに
Guid を主キーに使っているせいで、現在進行中でインデックスの断片化に悩まされている。 ソート可能な ULID を使えば、この悩みが軽減できたりするんだろうか。 気になったので実験してみた。
以下は実験に使ったサンプル
using System; using System.Data.SqlClient; using System.Threading.Tasks; using System.Linq; using Dapper; namespace UlidVsGuid { class Program { const int Count = 1000; const int DefaultIntervalMilliseconds = 10; static readonly string MasterConnectionString = new SqlConnectionStringBuilder() { DataSource = "(local)", InitialCatalog = "master", IntegratedSecurity = true, }.ToString(); static readonly string ConnectionString = new SqlConnectionStringBuilder() { DataSource = "(local)", InitialCatalog = "ulid_vs_guid", IntegratedSecurity = true, }.ToString(); static async Task Main(string[] args) { DefaultTypeMap.MatchNamesWithUnderscores = true; var intervalMilliseconds = int.TryParse(args.FirstOrDefault(), out var value) ? value : DefaultIntervalMilliseconds; await CleanUpAsync(); await SetUpAsync(); await Task.WhenAll( UlidTestAsync(intervalMilliseconds), GuidTestAsync(intervalMilliseconds), UlidToStringTestAsync(intervalMilliseconds), GuidToStringTestAsync(intervalMilliseconds)); await AverageFragmentationInPercentAsync(); Console.ReadLine(); } static async Task CleanUpAsync() { using (var connection = new SqlConnection(MasterConnectionString)) { await connection.OpenAsync(); await connection.ExecuteAsync( @"IF (DB_ID('ulid_vs_guid') IS NOT NULL) BEGIN DROP DATABASE ulid_vs_guid END"); } } static async Task SetUpAsync() { using (var connection = new SqlConnection(MasterConnectionString)) { await connection.OpenAsync(); await connection.ExecuteAsync( @"CREATE DATABASE [ulid_vs_guid];"); } using (var connection = new SqlConnection(ConnectionString)) { await connection.OpenAsync(); await connection.ExecuteAsync( @"CREATE TABLE [dbo].[ulid_test] ( [id] [uniqueidentifier] NOT NULL, [name] [nvarchar](50) NOT NULL, CONSTRAINT [PK_ulid_test] PRIMARY KEY CLUSTERED ( [id] ASC ) ); CREATE TABLE [dbo].[guid_test] ( [id] [uniqueidentifier] NOT NULL, [name] [nvarchar](50) NOT NULL, CONSTRAINT [PK_guid_test] PRIMARY KEY CLUSTERED ( [id] ASC ) ); CREATE TABLE [dbo].[ulid_str_test] ( [id] [nvarchar](50) NOT NULL, [name] [nvarchar](50) NOT NULL, CONSTRAINT [PK_ulid_str_test] PRIMARY KEY CLUSTERED ( [id] ASC ) ); CREATE TABLE [dbo].[guid_str_test] ( [id] [nvarchar](50) NOT NULL, [name] [nvarchar](50) NOT NULL, CONSTRAINT [PK_guid_str_test] PRIMARY KEY CLUSTERED ( [id] ASC ) ); "); } } static async Task UlidTestAsync(int intervalMilliseconds) { using (var connection = new SqlConnection(ConnectionString)) { await connection.OpenAsync(); for (var i = 0; i < Count; i++) { var id = new Guid(Ulid.NewUlid().ToByteArray()); var name = $"No.{i}"; await connection.ExecuteAsync( @"INSERT INTO ulid_test (id, name) VALUES (@id, @name)", new { id, name, }); await Task.Delay(intervalMilliseconds); } } } static async Task GuidTestAsync(int intervalMilliseconds) { using (var connection = new SqlConnection(ConnectionString)) { await connection.OpenAsync(); for (var i = 0; i < Count; i++) { var id = Guid.NewGuid(); var name = $"No.{i}"; await connection.ExecuteAsync( @"INSERT INTO guid_test (id, name) VALUES (@id, @name)", new { id, name, }); await Task.Delay(intervalMilliseconds); } } } static async Task UlidToStringTestAsync(int intervalMilliseconds) { using (var connection = new SqlConnection(ConnectionString)) { await connection.OpenAsync(); for (var i = 0; i < Count; i++) { var id = Ulid.NewUlid().ToString(); var name = $"No.{i}"; await connection.ExecuteAsync( @"INSERT INTO ulid_str_test (id, name) VALUES (@id, @name)", new { id, name, }); await Task.Delay(intervalMilliseconds); } } } static async Task GuidToStringTestAsync(int intervalMilliseconds) { using (var connection = new SqlConnection(ConnectionString)) { await connection.OpenAsync(); for (var i = 0; i < Count; i++) { var id = Guid.NewGuid().ToString(); var name = $"No.{i}"; await connection.ExecuteAsync( @"INSERT INTO guid_str_test (id, name) VALUES (@id, @name)", new { id, name, }); await Task.Delay(intervalMilliseconds); } } } // インデックスの断片化具合を取得 static async Task AverageFragmentationInPercentAsync() { using (var connection = new SqlConnection(ConnectionString)) { await connection.OpenAsync(); var results = await connection.QueryAsync<Fragmentation>( @"SELECT t2.name AS table_name, t3.name AS index_name, t1.avg_fragmentation_in_percent FROM sys.dm_db_index_physical_stats(DB_ID(), NULL, NULL, NULL, 'LIMITED') AS t1 JOIN sys.all_objects AS t2 ON t1.object_id = t2.object_id JOIN sys.indexes AS t3 ON t1.object_id = t3.object_id AND t1.index_id = t3.index_id WHERE t2.is_ms_shipped = 0 ORDER BY t1.avg_fragmentation_in_percent DESC"); foreach (var result in results) { Console.WriteLine( $"{result.IndexName} = {result.AvgFragmentationInPercent}%"); } } } } class Fragmentation { public string TableName { get; set; } public string IndexName { get; set; } public double AvgFragmentationInPercent { get; set; } } }
下記の4パターンで、1000件登録したときのインデックスの断片化の具合を調べてみた。
- uniqueidentifier 型の主キー列に Guid を格納
- uniqueidentifier 型の主キー列に、Ulid を Guid に変換した値を格納
- nvarchar(50) 型の主キー列に、Guid を string に変換した値を格納
- nvarchar(50) 型の主キー列に、Ulid を string に変換した値を格納
1件登録するたびにインターバルをとっているが、その間隔も 1ms・10ms・100ms・1000ms の4パターンで試してみた。
実験結果は下記の通り
インターバル 1ms のとき

インターバル 10 ms のとき

インターバル 100ms のとき

インターバル 1000ms のとき

まとめ
Ulid は ToString して nvarchar 型の列に格納する必要がある。 互換性あるからといって、Guid に変換して uniqueidentifier 型の列に格納しても効果ない。