Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit c1c2a4cd authored by Fred Isaman's avatar Fred Isaman Committed by Trond Myklebust
Browse files

pnfsblock: add extent manipulation functions



Adds working implementations of various support functions
to handle INVAL extents, needed by writes, such as
bl_mark_sectors_init and bl_is_sector_init.

[pnfsblock: fix 64-bit compiler warnings for extent manipulation]
Signed-off-by: default avatarFred Isaman <iisaman@citi.umich.edu>
Signed-off-by: default avatarBenny Halevy <bhalevy@panasas.com>
Signed-off-by: default avatarBenny Halevy <bhalevy@tonian.com>
[Implement release_inval_marks]
Signed-off-by: default avatarZhang Jingwang <zhangjingwang@nrchpc.ac.cn>
Signed-off-by: default avatarJim Rees <rees@umich.edu>
Signed-off-by: default avatarTrond Myklebust <Trond.Myklebust@netapp.com>
parent 6d742ba5
Loading
Loading
Loading
Loading
+6 −1
Original line number Diff line number Diff line
@@ -78,10 +78,15 @@ release_extents(struct pnfs_block_layout *bl, struct pnfs_layout_range *range)
	spin_unlock(&bl->bl_ext_lock);
}

/* STUB */
static void
release_inval_marks(struct pnfs_inval_markings *marks)
{
	struct pnfs_inval_tracking *pos, *temp;

	list_for_each_entry_safe(pos, temp, &marks->im_tree.mtt_stub, it_link) {
		list_del(&pos->it_link);
		kfree(pos);
	}
	return;
}

+28 −2
Original line number Diff line number Diff line
@@ -38,6 +38,9 @@

#include "../pnfs.h"

#define PAGE_CACHE_SECTORS (PAGE_CACHE_SIZE >> SECTOR_SHIFT)
#define PAGE_CACHE_SECTOR_SHIFT (PAGE_CACHE_SHIFT - SECTOR_SHIFT)

struct block_mount_id {
	spinlock_t			bm_lock;    /* protects list */
	struct list_head		bm_devlist; /* holds pnfs_block_dev */
@@ -56,8 +59,23 @@ enum exstate4 {
	PNFS_BLOCK_NONE_DATA		= 3  /* unmapped, it's a hole */
};

#define MY_MAX_TAGS (15) /* tag bitnums used must be less than this */

struct my_tree {
	sector_t		mtt_step_size;	/* Internal sector alignment */
	struct list_head	mtt_stub; /* Should be a radix tree */
};

struct pnfs_inval_markings {
	/* STUB */
	spinlock_t	im_lock;
	struct my_tree	im_tree;	/* Sectors that need LAYOUTCOMMIT */
	sector_t	im_block_size;	/* Server blocksize in sectors */
};

struct pnfs_inval_tracking {
	struct list_head it_link;
	int		 it_sector;
	int		 it_tags;
};

/* sector_t fields are all in 512-byte sectors */
@@ -76,7 +94,11 @@ struct pnfs_block_extent {
static inline void
BL_INIT_INVAL_MARKS(struct pnfs_inval_markings *marks, sector_t blocksize)
{
	/* STUB */
	spin_lock_init(&marks->im_lock);
	INIT_LIST_HEAD(&marks->im_tree.mtt_stub);
	marks->im_block_size = blocksize;
	marks->im_tree.mtt_step_size = min((sector_t)PAGE_CACHE_SECTORS,
					   blocksize);
}

enum extentclass4 {
@@ -156,8 +178,12 @@ void bl_free_block_dev(struct pnfs_block_dev *bdev);
struct pnfs_block_extent *
bl_find_get_extent(struct pnfs_block_layout *bl, sector_t isect,
		struct pnfs_block_extent **cow_read);
int bl_mark_sectors_init(struct pnfs_inval_markings *marks,
			     sector_t offset, sector_t length,
			     sector_t **pages);
void bl_put_extent(struct pnfs_block_extent *be);
struct pnfs_block_extent *bl_alloc_extent(void);
int bl_is_sector_init(struct pnfs_inval_markings *marks, sector_t isect);
int bl_add_merge_extent(struct pnfs_block_layout *bl,
			 struct pnfs_block_extent *new);

+253 −0
Original line number Diff line number Diff line
@@ -33,6 +33,259 @@
#include "blocklayout.h"
#define NFSDBG_FACILITY         NFSDBG_PNFS_LD

/* Bit numbers */
#define EXTENT_INITIALIZED 0
#define EXTENT_WRITTEN     1
#define EXTENT_IN_COMMIT   2
#define INTERNAL_EXISTS    MY_MAX_TAGS
#define INTERNAL_MASK      ((1 << INTERNAL_EXISTS) - 1)

/* Returns largest t<=s s.t. t%base==0 */
static inline sector_t normalize(sector_t s, int base)
{
	sector_t tmp = s; /* Since do_div modifies its argument */
	return s - do_div(tmp, base);
}

static inline sector_t normalize_up(sector_t s, int base)
{
	return normalize(s + base - 1, base);
}

/* Complete stub using list while determine API wanted */

/* Returns tags, or negative */
static int32_t _find_entry(struct my_tree *tree, u64 s)
{
	struct pnfs_inval_tracking *pos;

	dprintk("%s(%llu) enter\n", __func__, s);
	list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) {
		if (pos->it_sector > s)
			continue;
		else if (pos->it_sector == s)
			return pos->it_tags & INTERNAL_MASK;
		else
			break;
	}
	return -ENOENT;
}

static inline
int _has_tag(struct my_tree *tree, u64 s, int32_t tag)
{
	int32_t tags;

	dprintk("%s(%llu, %i) enter\n", __func__, s, tag);
	s = normalize(s, tree->mtt_step_size);
	tags = _find_entry(tree, s);
	if ((tags < 0) || !(tags & (1 << tag)))
		return 0;
	else
		return 1;
}

/* Creates entry with tag, or if entry already exists, unions tag to it.
 * If storage is not NULL, newly created entry will use it.
 * Returns number of entries added, or negative on error.
 */
static int _add_entry(struct my_tree *tree, u64 s, int32_t tag,
		      struct pnfs_inval_tracking *storage)
{
	int found = 0;
	struct pnfs_inval_tracking *pos;

	dprintk("%s(%llu, %i, %p) enter\n", __func__, s, tag, storage);
	list_for_each_entry_reverse(pos, &tree->mtt_stub, it_link) {
		if (pos->it_sector > s)
			continue;
		else if (pos->it_sector == s) {
			found = 1;
			break;
		} else
			break;
	}
	if (found) {
		pos->it_tags |= (1 << tag);
		return 0;
	} else {
		struct pnfs_inval_tracking *new;
		if (storage)
			new = storage;
		else {
			new = kmalloc(sizeof(*new), GFP_NOFS);
			if (!new)
				return -ENOMEM;
		}
		new->it_sector = s;
		new->it_tags = (1 << tag);
		list_add(&new->it_link, &pos->it_link);
		return 1;
	}
}

/* XXXX Really want option to not create */
/* Over range, unions tag with existing entries, else creates entry with tag */
static int _set_range(struct my_tree *tree, int32_t tag, u64 s, u64 length)
{
	u64 i;

	dprintk("%s(%i, %llu, %llu) enter\n", __func__, tag, s, length);
	for (i = normalize(s, tree->mtt_step_size); i < s + length;
	     i += tree->mtt_step_size)
		if (_add_entry(tree, i, tag, NULL))
			return -ENOMEM;
	return 0;
}

/* Ensure that future operations on given range of tree will not malloc */
static int _preload_range(struct my_tree *tree, u64 offset, u64 length)
{
	u64 start, end, s;
	int count, i, used = 0, status = -ENOMEM;
	struct pnfs_inval_tracking **storage;

	dprintk("%s(%llu, %llu) enter\n", __func__, offset, length);
	start = normalize(offset, tree->mtt_step_size);
	end = normalize_up(offset + length, tree->mtt_step_size);
	count = (int)(end - start) / (int)tree->mtt_step_size;

	/* Pre-malloc what memory we might need */
	storage = kmalloc(sizeof(*storage) * count, GFP_NOFS);
	if (!storage)
		return -ENOMEM;
	for (i = 0; i < count; i++) {
		storage[i] = kmalloc(sizeof(struct pnfs_inval_tracking),
				     GFP_NOFS);
		if (!storage[i])
			goto out_cleanup;
	}

	/* Now need lock - HOW??? */

	for (s = start; s < end; s += tree->mtt_step_size)
		used += _add_entry(tree, s, INTERNAL_EXISTS, storage[used]);

	/* Unlock - HOW??? */
	status = 0;

 out_cleanup:
	for (i = used; i < count; i++) {
		if (!storage[i])
			break;
		kfree(storage[i]);
	}
	kfree(storage);
	return status;
}

static void set_needs_init(sector_t *array, sector_t offset)
{
	sector_t *p = array;

	dprintk("%s enter\n", __func__);
	if (!p)
		return;
	while (*p < offset)
		p++;
	if (*p == offset)
		return;
	else if (*p == ~0) {
		*p++ = offset;
		*p = ~0;
		return;
	} else {
		sector_t *save = p;
		dprintk("%s Adding %llu\n", __func__, (u64)offset);
		while (*p != ~0)
			p++;
		p++;
		memmove(save + 1, save, (char *)p - (char *)save);
		*save = offset;
		return;
	}
}

/* We are relying on page lock to serialize this */
int bl_is_sector_init(struct pnfs_inval_markings *marks, sector_t isect)
{
	int rv;

	spin_lock(&marks->im_lock);
	rv = _has_tag(&marks->im_tree, isect, EXTENT_INITIALIZED);
	spin_unlock(&marks->im_lock);
	return rv;
}

/* Marks sectors in [offest, offset_length) as having been initialized.
 * All lengths are step-aligned, where step is min(pagesize, blocksize).
 * Notes where partial block is initialized, and helps prepare it for
 * complete initialization later.
 */
/* Currently assumes offset is page-aligned */
int bl_mark_sectors_init(struct pnfs_inval_markings *marks,
			     sector_t offset, sector_t length,
			     sector_t **pages)
{
	sector_t s, start, end;
	sector_t *array = NULL; /* Pages to mark */

	dprintk("%s(offset=%llu,len=%llu) enter\n",
		__func__, (u64)offset, (u64)length);
	s = max((sector_t) 3,
		2 * (marks->im_block_size / (PAGE_CACHE_SECTORS)));
	dprintk("%s set max=%llu\n", __func__, (u64)s);
	if (pages) {
		array = kmalloc(s * sizeof(sector_t), GFP_NOFS);
		if (!array)
			goto outerr;
		array[0] = ~0;
	}

	start = normalize(offset, marks->im_block_size);
	end = normalize_up(offset + length, marks->im_block_size);
	if (_preload_range(&marks->im_tree, start, end - start))
		goto outerr;

	spin_lock(&marks->im_lock);

	for (s = normalize_up(start, PAGE_CACHE_SECTORS);
	     s < offset; s += PAGE_CACHE_SECTORS) {
		dprintk("%s pre-area pages\n", __func__);
		/* Portion of used block is not initialized */
		if (!_has_tag(&marks->im_tree, s, EXTENT_INITIALIZED))
			set_needs_init(array, s);
	}
	if (_set_range(&marks->im_tree, EXTENT_INITIALIZED, offset, length))
		goto out_unlock;
	for (s = normalize_up(offset + length, PAGE_CACHE_SECTORS);
	     s < end; s += PAGE_CACHE_SECTORS) {
		dprintk("%s post-area pages\n", __func__);
		if (!_has_tag(&marks->im_tree, s, EXTENT_INITIALIZED))
			set_needs_init(array, s);
	}

	spin_unlock(&marks->im_lock);

	if (pages) {
		if (array[0] == ~0) {
			kfree(array);
			*pages = NULL;
		} else
			*pages = array;
	}
	return 0;

 out_unlock:
	spin_unlock(&marks->im_lock);
 outerr:
	if (pages) {
		kfree(array);
		*pages = NULL;
	}
	return -ENOMEM;
}

static void print_bl_extent(struct pnfs_block_extent *be)
{
	dprintk("PRINT EXTENT extent %p\n", be);