/** * lcnalloc.c - Cluster (de)allocation code. Originated from the Linux-NTFS project. * * Copyright (c) 2002-2004 Anton Altaparmakov * Copyright (c) 2004 Yura Pakhuchiy * Copyright (c) 2004-2008 Szabolcs Szakacsits * Copyright (c) 2008-2009 Jean-Pierre Andre * * This program/include file is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License as published * by the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program/include file is distributed in the hope that it will be * useful, but WITHOUT ANY WARRANTY; without even the implied warranty * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program (in the main directory of the NTFS-3G * distribution in the file COPYING); if not, write to the Free Software * Foundation,Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #ifdef HAVE_STDLIB_H #include #endif #ifdef HAVE_STDIO_H #include #endif #ifdef HAVE_ERRNO_H #include #endif #include "types.h" #include "attrib.h" #include "bitmap.h" #include "debug.h" #include "runlist.h" #include "volume.h" #include "lcnalloc.h" #include "logging.h" #include "misc.h" /* * Plenty possibilities for big optimizations all over in the cluster * allocation, however at the moment the dominant bottleneck (~ 90%) is * the update of the mapping pairs which converges to the cubic Faulhaber's * formula as the function of the number of extents (fragments, runs). */ #define NTFS_LCNALLOC_BSIZE 4096 #define NTFS_LCNALLOC_SKIP NTFS_LCNALLOC_BSIZE enum { ZONE_MFT = 1, ZONE_DATA1 = 2, ZONE_DATA2 = 4 } ; static void ntfs_cluster_set_zone_pos( LCN start, LCN end, LCN *pos, LCN tc ) { ntfs_log_trace( "pos: %lld tc: %lld\n", ( long long )*pos, ( long long )tc ); if ( tc >= end ) *pos = start; else if ( tc >= start ) *pos = tc; } static void ntfs_cluster_update_zone_pos( ntfs_volume *vol, u8 zone, LCN tc ) { ntfs_log_trace( "tc = %lld, zone = %d\n", ( long long )tc, zone ); if ( zone == ZONE_MFT ) ntfs_cluster_set_zone_pos( vol->mft_lcn, vol->mft_zone_end, &vol->mft_zone_pos, tc ); else if ( zone == ZONE_DATA1 ) ntfs_cluster_set_zone_pos( vol->mft_zone_end, vol->nr_clusters, &vol->data1_zone_pos, tc ); else /* zone == ZONE_DATA2 */ ntfs_cluster_set_zone_pos( 0, vol->mft_zone_start, &vol->data2_zone_pos, tc ); } /* * Unmark full zones when a cluster has been freed in a full zone * * Next allocation will reuse the freed cluster */ static void update_full_status( ntfs_volume *vol, LCN lcn ) { if ( lcn >= vol->mft_zone_end ) { if ( vol->full_zones & ZONE_DATA1 ) { ntfs_cluster_update_zone_pos( vol, ZONE_DATA1, lcn ); vol->full_zones &= ~ZONE_DATA1; } } else if ( lcn < vol->mft_zone_start ) { if ( vol->full_zones & ZONE_DATA2 ) { ntfs_cluster_update_zone_pos( vol, ZONE_DATA2, lcn ); vol->full_zones &= ~ZONE_DATA2; } } else { if ( vol->full_zones & ZONE_MFT ) { ntfs_cluster_update_zone_pos( vol, ZONE_MFT, lcn ); vol->full_zones &= ~ZONE_MFT; } } } static s64 max_empty_bit_range( unsigned char *buf, int size ) { int i, j, run = 0; int max_range = 0; s64 start_pos = -1; ntfs_log_trace( "Entering\n" ); i = 0; while ( i < size ) { switch ( *buf ) { case 0 : do { buf++; run += 8; i++; } while ( ( i < size ) && !*buf ); break; case 255 : if ( run > max_range ) { max_range = run; start_pos = ( s64 )i * 8 - run; } run = 0; do { buf++; i++; } while ( ( i < size ) && ( *buf == 255 ) ); break; default : for ( j = 0; j < 8; j++ ) { int bit = *buf & ( 1 << j ); if ( bit ) { if ( run > max_range ) { max_range = run; start_pos = ( s64 )i * 8 + ( j - run ); } run = 0; } else run++; } i++; buf++; } } if ( run > max_range ) start_pos = ( s64 )i * 8 - run; return start_pos; } static int bitmap_writeback( ntfs_volume *vol, s64 pos, s64 size, void *b, u8 *writeback ) { s64 written; ntfs_log_trace( "Entering\n" ); if ( !*writeback ) return 0; *writeback = 0; written = ntfs_attr_pwrite( vol->lcnbmp_na, pos, size, b ); if ( written != size ) { if ( !written ) errno = EIO; ntfs_log_perror( "Bitmap write error (%lld, %lld)", ( long long )pos, ( long long )size ); return -1; } return 0; } /** * ntfs_cluster_alloc - allocate clusters on an ntfs volume * @vol: mounted ntfs volume on which to allocate the clusters * @start_vcn: vcn to use for the first allocated cluster * @count: number of clusters to allocate * @start_lcn: starting lcn at which to allocate the clusters (or -1 if none) * @zone: zone from which to allocate the clusters * * Allocate @count clusters preferably starting at cluster @start_lcn or at the * current allocator position if @start_lcn is -1, on the mounted ntfs volume * @vol. @zone is either DATA_ZONE for allocation of normal clusters and * MFT_ZONE for allocation of clusters for the master file table, i.e. the * $MFT/$DATA attribute. * * On success return a runlist describing the allocated cluster(s). * * On error return NULL with errno set to the error code. * * Notes on the allocation algorithm * ================================= * * There are two data zones. First is the area between the end of the mft zone * and the end of the volume, and second is the area between the start of the * volume and the start of the mft zone. On unmodified/standard NTFS 1.x * volumes, the second data zone doesn't exist due to the mft zone being * expanded to cover the start of the volume in order to reserve space for the * mft bitmap attribute. * * The complexity stems from the need of implementing the mft vs data zoned * approach and from the fact that we have access to the lcn bitmap via up to * NTFS_LCNALLOC_BSIZE bytes at a time, so we need to cope with crossing over * boundaries of two buffers. Further, the fact that the allocator allows for * caller supplied hints as to the location of where allocation should begin * and the fact that the allocator keeps track of where in the data zones the * next natural allocation should occur, contribute to the complexity of the * function. But it should all be worthwhile, because this allocator: * 1) implements MFT zone reservation * 2) causes reduction in fragmentation. * The code is not optimized for speed. */ runlist *ntfs_cluster_alloc( ntfs_volume *vol, VCN start_vcn, s64 count, LCN start_lcn, const NTFS_CLUSTER_ALLOCATION_ZONES zone ) { LCN zone_start, zone_end; /* current search range */ LCN last_read_pos, lcn; LCN bmp_pos; /* current bit position inside the bitmap */ LCN prev_lcn = 0, prev_run_len = 0; s64 clusters, br; runlist *rl = NULL, *trl; u8 *buf, *byte, bit, writeback; u8 pass = 1; /* 1: inside zone; 2: start of zone */ u8 search_zone; /* 4: data2 (start) 1: mft (middle) 2: data1 (end) */ u8 done_zones = 0; u8 has_guess, used_zone_pos; int err = 0, rlpos, rlsize, buf_size; ntfs_log_enter( "Entering with count = 0x%llx, start_lcn = 0x%llx, " "zone = %s_ZONE.\n", ( long long )count, ( long long ) start_lcn, zone == MFT_ZONE ? "MFT" : "DATA" ); if ( !vol || count < 0 || start_lcn < -1 || !vol->lcnbmp_na || ( s8 )zone < FIRST_ZONE || zone > LAST_ZONE ) { errno = EINVAL; ntfs_log_perror( "%s: vcn: %lld, count: %lld, lcn: %lld", __FUNCTION__, ( long long )start_vcn, ( long long )count, ( long long )start_lcn ); goto out; } /* Return empty runlist if @count == 0 */ if ( !count ) { rl = ntfs_malloc( 0x1000 ); if ( rl ) { rl[0].vcn = start_vcn; rl[0].lcn = LCN_RL_NOT_MAPPED; rl[0].length = 0; } goto out; } buf = ntfs_malloc( NTFS_LCNALLOC_BSIZE ); if ( !buf ) goto out; /* * If no @start_lcn was requested, use the current zone * position otherwise use the requested @start_lcn. */ has_guess = 1; zone_start = start_lcn; if ( zone_start < 0 ) { if ( zone == DATA_ZONE ) zone_start = vol->data1_zone_pos; else zone_start = vol->mft_zone_pos; has_guess = 0; } used_zone_pos = has_guess ? 0 : 1; if ( !zone_start || zone_start == vol->mft_zone_start || zone_start == vol->mft_zone_end ) pass = 2; if ( zone_start < vol->mft_zone_start ) { zone_end = vol->mft_zone_start; search_zone = ZONE_DATA2; } else if ( zone_start < vol->mft_zone_end ) { zone_end = vol->mft_zone_end; search_zone = ZONE_MFT; } else { zone_end = vol->nr_clusters; search_zone = ZONE_DATA1; } bmp_pos = zone_start; /* Loop until all clusters are allocated. */ clusters = count; rlpos = rlsize = 0; while ( 1 ) { /* check whether we have exhausted the current zone */ if ( search_zone & vol->full_zones ) goto zone_pass_done; last_read_pos = bmp_pos >> 3; br = ntfs_attr_pread( vol->lcnbmp_na, last_read_pos, NTFS_LCNALLOC_BSIZE, buf ); if ( br <= 0 ) { if ( !br ) goto zone_pass_done; err = errno; ntfs_log_perror( "Reading $BITMAP failed" ); goto err_ret; } /* * We might have read less than NTFS_LCNALLOC_BSIZE bytes * if we are close to the end of the attribute. */ buf_size = ( int )br << 3; lcn = bmp_pos & 7; bmp_pos &= ~7; writeback = 0; while ( lcn < buf_size ) { byte = buf + ( lcn >> 3 ); bit = 1 << ( lcn & 7 ); if ( has_guess ) { if ( *byte & bit ) { has_guess = 0; break; } } else { lcn = max_empty_bit_range( buf, br ); if ( lcn < 0 ) break; has_guess = 1; continue; } /* First free bit is at lcn + bmp_pos. */ /* Reallocate memory if necessary. */ if ( ( rlpos + 2 ) * ( int )sizeof( runlist ) >= rlsize ) { rlsize += 4096; trl = realloc( rl, rlsize ); if ( !trl ) { err = ENOMEM; ntfs_log_perror( "realloc() failed" ); goto wb_err_ret; } rl = trl; } /* Allocate the bitmap bit. */ *byte |= bit; writeback = 1; if ( vol->free_clusters <= 0 ) ntfs_log_error( "Non-positive free clusters " "(%lld)!\n", ( long long )vol->free_clusters ); else vol->free_clusters--; /* * Coalesce with previous run if adjacent LCNs. * Otherwise, append a new run. */ if ( prev_lcn == lcn + bmp_pos - prev_run_len && rlpos ) { ntfs_log_debug( "Cluster coalesce: prev_lcn: " "%lld lcn: %lld bmp_pos: %lld " "prev_run_len: %lld\n", ( long long )prev_lcn, ( long long )lcn, ( long long )bmp_pos, ( long long )prev_run_len ); rl[rlpos - 1].length = ++prev_run_len; } else { if ( rlpos ) rl[rlpos].vcn = rl[rlpos - 1].vcn + prev_run_len; else { rl[rlpos].vcn = start_vcn; ntfs_log_debug( "Start_vcn: %lld\n", ( long long )start_vcn ); } rl[rlpos].lcn = prev_lcn = lcn + bmp_pos; rl[rlpos].length = prev_run_len = 1; rlpos++; } ntfs_log_debug( "RUN: %-16lld %-16lld %-16lld\n", ( long long )rl[rlpos - 1].vcn, ( long long )rl[rlpos - 1].lcn, ( long long )rl[rlpos - 1].length ); /* Done? */ if ( !--clusters ) { if ( used_zone_pos ) ntfs_cluster_update_zone_pos( vol, search_zone, lcn + bmp_pos + 1 + NTFS_LCNALLOC_SKIP ); goto done_ret; } lcn++; } if ( bitmap_writeback( vol, last_read_pos, br, buf, &writeback ) ) { err = errno; goto err_ret; } if ( !used_zone_pos ) { used_zone_pos = 1; if ( search_zone == ZONE_MFT ) zone_start = vol->mft_zone_pos; else if ( search_zone == ZONE_DATA1 ) zone_start = vol->data1_zone_pos; else zone_start = vol->data2_zone_pos; if ( !zone_start || zone_start == vol->mft_zone_start || zone_start == vol->mft_zone_end ) pass = 2; bmp_pos = zone_start; } else bmp_pos += buf_size; if ( bmp_pos < zone_end ) continue; zone_pass_done: ntfs_log_trace( "Finished current zone pass(%i).\n", pass ); if ( pass == 1 ) { pass = 2; zone_end = zone_start; if ( search_zone == ZONE_MFT ) zone_start = vol->mft_zone_start; else if ( search_zone == ZONE_DATA1 ) zone_start = vol->mft_zone_end; else zone_start = 0; /* Sanity check. */ if ( zone_end < zone_start ) zone_end = zone_start; bmp_pos = zone_start; continue; } /* pass == 2 */ done_zones_check: done_zones |= search_zone; vol->full_zones |= search_zone; if ( done_zones < ( ZONE_MFT + ZONE_DATA1 + ZONE_DATA2 ) ) { ntfs_log_trace( "Switching zone.\n" ); pass = 1; if ( rlpos ) { LCN tc = rl[rlpos - 1].lcn + rl[rlpos - 1].length + NTFS_LCNALLOC_SKIP; if ( used_zone_pos ) ntfs_cluster_update_zone_pos( vol, search_zone, tc ); } switch ( search_zone ) { case ZONE_MFT: ntfs_log_trace( "Zone switch: mft -> data1\n" ); switch_to_data1_zone: search_zone = ZONE_DATA1; zone_start = vol->data1_zone_pos; zone_end = vol->nr_clusters; if ( zone_start == vol->mft_zone_end ) pass = 2; break; case ZONE_DATA1: ntfs_log_trace( "Zone switch: data1 -> data2\n" ); search_zone = ZONE_DATA2; zone_start = vol->data2_zone_pos; zone_end = vol->mft_zone_start; if ( !zone_start ) pass = 2; break; case ZONE_DATA2: if ( !( done_zones & ZONE_DATA1 ) ) { ntfs_log_trace( "data2 -> data1\n" ); goto switch_to_data1_zone; } ntfs_log_trace( "Zone switch: data2 -> mft\n" ); search_zone = ZONE_MFT; zone_start = vol->mft_zone_pos; zone_end = vol->mft_zone_end; if ( zone_start == vol->mft_zone_start ) pass = 2; break; } bmp_pos = zone_start; if ( zone_start == zone_end ) { ntfs_log_trace( "Empty zone, skipped.\n" ); goto done_zones_check; } continue; } ntfs_log_trace( "All zones are finished, no space on device.\n" ); err = ENOSPC; goto err_ret; } done_ret: ntfs_log_debug( "At done_ret.\n" ); /* Add runlist terminator element. */ rl[rlpos].vcn = rl[rlpos - 1].vcn + rl[rlpos - 1].length; rl[rlpos].lcn = LCN_RL_NOT_MAPPED; rl[rlpos].length = 0; if ( bitmap_writeback( vol, last_read_pos, br, buf, &writeback ) ) { err = errno; goto err_ret; } done_err_ret: free( buf ); if ( err ) { errno = err; ntfs_log_perror( "Failed to allocate clusters" ); rl = NULL; } out: ntfs_log_leave( "\n" ); return rl; wb_err_ret: ntfs_log_trace( "At wb_err_ret.\n" ); if ( bitmap_writeback( vol, last_read_pos, br, buf, &writeback ) ) err = errno; err_ret: ntfs_log_trace( "At err_ret.\n" ); if ( rl ) { /* Add runlist terminator element. */ rl[rlpos].vcn = rl[rlpos - 1].vcn + rl[rlpos - 1].length; rl[rlpos].lcn = LCN_RL_NOT_MAPPED; rl[rlpos].length = 0; ntfs_debug_runlist_dump( rl ); ntfs_cluster_free_from_rl( vol, rl ); free( rl ); rl = NULL; } goto done_err_ret; } /** * ntfs_cluster_free_from_rl - free clusters from runlist * @vol: mounted ntfs volume on which to free the clusters * @rl: runlist from which deallocate clusters * * On success return 0 and on error return -1 with errno set to the error code. */ int ntfs_cluster_free_from_rl( ntfs_volume *vol, runlist *rl ) { s64 nr_freed = 0; int ret = -1; ntfs_log_trace( "Entering.\n" ); for ( ; rl->length; rl++ ) { ntfs_log_trace( "Dealloc lcn 0x%llx, len 0x%llx.\n", ( long long )rl->lcn, ( long long )rl->length ); if ( rl->lcn >= 0 ) { update_full_status( vol, rl->lcn ); if ( ntfs_bitmap_clear_run( vol->lcnbmp_na, rl->lcn, rl->length ) ) { ntfs_log_perror( "Cluster deallocation failed " "(%lld, %lld)", ( long long )rl->lcn, ( long long )rl->length ); goto out; } nr_freed += rl->length ; } } ret = 0; out: vol->free_clusters += nr_freed; if ( vol->free_clusters > vol->nr_clusters ) ntfs_log_error( "Too many free clusters (%lld > %lld)!", ( long long )vol->free_clusters, ( long long )vol->nr_clusters ); return ret; } /* * Basic cluster run free * Returns 0 if successful */ int ntfs_cluster_free_basic( ntfs_volume *vol, s64 lcn, s64 count ) { s64 nr_freed = 0; int ret = -1; ntfs_log_trace( "Entering.\n" ); ntfs_log_trace( "Dealloc lcn 0x%llx, len 0x%llx.\n", ( long long )lcn, ( long long )count ); if ( lcn >= 0 ) { update_full_status( vol, lcn ); if ( ntfs_bitmap_clear_run( vol->lcnbmp_na, lcn, count ) ) { ntfs_log_perror( "Cluster deallocation failed " "(%lld, %lld)", ( long long )lcn, ( long long )count ); goto out; } nr_freed += count; } ret = 0; out: vol->free_clusters += nr_freed; if ( vol->free_clusters > vol->nr_clusters ) ntfs_log_error( "Too many free clusters (%lld > %lld)!", ( long long )vol->free_clusters, ( long long )vol->nr_clusters ); return ret; } /** * ntfs_cluster_free - free clusters on an ntfs volume * @vol: mounted ntfs volume on which to free the clusters * @na: attribute whose runlist describes the clusters to free * @start_vcn: vcn in @rl at which to start freeing clusters * @count: number of clusters to free or -1 for all clusters * * Free @count clusters starting at the cluster @start_vcn in the runlist * described by the attribute @na from the mounted ntfs volume @vol. * * If @count is -1, all clusters from @start_vcn to the end of the runlist * are deallocated. * * On success return the number of deallocated clusters (not counting sparse * clusters) and on error return -1 with errno set to the error code. */ int ntfs_cluster_free( ntfs_volume *vol, ntfs_attr *na, VCN start_vcn, s64 count ) { runlist *rl; s64 delta, to_free, nr_freed = 0; int ret = -1; if ( !vol || !vol->lcnbmp_na || !na || start_vcn < 0 || ( count < 0 && count != -1 ) ) { ntfs_log_trace( "Invalid arguments!\n" ); errno = EINVAL; return -1; } ntfs_log_enter( "Entering for inode 0x%llx, attr 0x%x, count 0x%llx, " "vcn 0x%llx.\n", ( unsigned long long )na->ni->mft_no, na->type, ( long long )count, ( long long )start_vcn ); rl = ntfs_attr_find_vcn( na, start_vcn ); if ( !rl ) { if ( errno == ENOENT ) ret = 0; goto leave; } if ( rl->lcn < 0 && rl->lcn != LCN_HOLE ) { errno = EIO; ntfs_log_perror( "%s: Unexpected lcn (%lld)", __FUNCTION__, ( long long )rl->lcn ); goto leave; } /* Find the starting cluster inside the run that needs freeing. */ delta = start_vcn - rl->vcn; /* The number of clusters in this run that need freeing. */ to_free = rl->length - delta; if ( count >= 0 && to_free > count ) to_free = count; if ( rl->lcn != LCN_HOLE ) { /* Do the actual freeing of the clusters in this run. */ update_full_status( vol, rl->lcn + delta ); if ( ntfs_bitmap_clear_run( vol->lcnbmp_na, rl->lcn + delta, to_free ) ) goto leave; nr_freed = to_free; } /* Go to the next run and adjust the number of clusters left to free. */ ++rl; if ( count >= 0 ) count -= to_free; /* * Loop over the remaining runs, using @count as a capping value, and * free them. */ for ( ; rl->length && count != 0; ++rl ) { // FIXME: Need to try ntfs_attr_map_runlist() for attribute // list support! (AIA) if ( rl->lcn < 0 && rl->lcn != LCN_HOLE ) { // FIXME: Eeek! We need rollback! (AIA) errno = EIO; ntfs_log_perror( "%s: Invalid lcn (%lli)", __FUNCTION__, ( long long )rl->lcn ); goto out; } /* The number of clusters in this run that need freeing. */ to_free = rl->length; if ( count >= 0 && to_free > count ) to_free = count; if ( rl->lcn != LCN_HOLE ) { update_full_status( vol, rl->lcn ); if ( ntfs_bitmap_clear_run( vol->lcnbmp_na, rl->lcn, to_free ) ) { // FIXME: Eeek! We need rollback! (AIA) ntfs_log_perror( "%s: Clearing bitmap run failed", __FUNCTION__ ); goto out; } nr_freed += to_free; } if ( count >= 0 ) count -= to_free; } if ( count != -1 && count != 0 ) { // FIXME: Eeek! BUG() errno = EIO; ntfs_log_perror( "%s: count still not zero (%lld)", __FUNCTION__, ( long long )count ); goto out; } ret = nr_freed; out: vol->free_clusters += nr_freed ; if ( vol->free_clusters > vol->nr_clusters ) ntfs_log_error( "Too many free clusters (%lld > %lld)!", ( long long )vol->free_clusters, ( long long )vol->nr_clusters ); leave: ntfs_log_leave( "\n" ); return ret; }