Donate to e Foundation | Murena handsets with /e/OS | Own a part of Murena! Learn more

Commit 0a4ebed7 authored by Linus Torvalds's avatar Linus Torvalds
Browse files
* 'upstream-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jlbec/ocfs2: (31 commits)
  ocfs2: avoid unaligned access to dqc_bitmap
  ocfs2: Use filemap_write_and_wait() instead of write_inode_now()
  ocfs2: honor O_(D)SYNC flag in fallocate
  ocfs2: Add a missing journal credit in ocfs2_link_credits() -v2
  ocfs2: send correct UUID to cleancache initialization
  ocfs2: Commit transactions in error cases -v2
  ocfs2: make direntry invalid when deleting it
  fs/ocfs2/dlm/dlmlock.c: free kmem_cache_zalloc'd data using kmem_cache_free
  ocfs2: Avoid livelock in ocfs2_readpage()
  ocfs2: serialize unaligned aio
  ocfs2: Implement llseek()
  ocfs2: Fix ocfs2_page_mkwrite()
  ocfs2: Add comment about orphan scanning
  ocfs2: Clean up messages in the fs
  ocfs2/cluster: Cluster up now includes network connections too
  ocfs2/cluster: Add new function o2net_fill_node_map()
  ocfs2/cluster: Fix output in file elapsed_time_in_ms
  ocfs2/dlm: dlmlock_remote() needs to account for remastery
  ocfs2/dlm: Take inflight reference count for remotely mastered resources too
  ocfs2/dlm: Cleanup dlm_wait_for_node_death() and dlm_wait_for_node_recovery()
  ...
parents 3b120ab7 93925579
Loading
Loading
Loading
Loading
+1 −1
Original line number Original line Diff line number Diff line
@@ -5699,7 +5699,7 @@ int ocfs2_remove_btree_range(struct inode *inode,
					   OCFS2_JOURNAL_ACCESS_WRITE);
					   OCFS2_JOURNAL_ACCESS_WRITE);
	if (ret) {
	if (ret) {
		mlog_errno(ret);
		mlog_errno(ret);
		goto out;
		goto out_commit;
	}
	}


	dquot_free_space_nodirty(inode,
	dquot_free_space_nodirty(inode,
+61 −8
Original line number Original line Diff line number Diff line
@@ -290,7 +290,15 @@ static int ocfs2_readpage(struct file *file, struct page *page)
	}
	}


	if (down_read_trylock(&oi->ip_alloc_sem) == 0) {
	if (down_read_trylock(&oi->ip_alloc_sem) == 0) {
		/*
		 * Unlock the page and cycle ip_alloc_sem so that we don't
		 * busyloop waiting for ip_alloc_sem to unlock
		 */
		ret = AOP_TRUNCATED_PAGE;
		ret = AOP_TRUNCATED_PAGE;
		unlock_page(page);
		unlock = 0;
		down_read(&oi->ip_alloc_sem);
		up_read(&oi->ip_alloc_sem);
		goto out_inode_unlock;
		goto out_inode_unlock;
	}
	}


@@ -563,6 +571,7 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
{
{
	struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
	struct inode *inode = iocb->ki_filp->f_path.dentry->d_inode;
	int level;
	int level;
	wait_queue_head_t *wq = ocfs2_ioend_wq(inode);


	/* this io's submitter should not have unlocked this before we could */
	/* this io's submitter should not have unlocked this before we could */
	BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));
	BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));
@@ -570,6 +579,15 @@ static void ocfs2_dio_end_io(struct kiocb *iocb,
	if (ocfs2_iocb_is_sem_locked(iocb))
	if (ocfs2_iocb_is_sem_locked(iocb))
		ocfs2_iocb_clear_sem_locked(iocb);
		ocfs2_iocb_clear_sem_locked(iocb);


	if (ocfs2_iocb_is_unaligned_aio(iocb)) {
		ocfs2_iocb_clear_unaligned_aio(iocb);

		if (atomic_dec_and_test(&OCFS2_I(inode)->ip_unaligned_aio) &&
		    waitqueue_active(wq)) {
			wake_up_all(wq);
		}
	}

	ocfs2_iocb_clear_rw_locked(iocb);
	ocfs2_iocb_clear_rw_locked(iocb);


	level = ocfs2_iocb_rw_locked_level(iocb);
	level = ocfs2_iocb_rw_locked_level(iocb);
@@ -862,6 +880,12 @@ struct ocfs2_write_ctxt {
	struct page			*w_pages[OCFS2_MAX_CTXT_PAGES];
	struct page			*w_pages[OCFS2_MAX_CTXT_PAGES];
	struct page			*w_target_page;
	struct page			*w_target_page;


	/*
	 * w_target_locked is used for page_mkwrite path indicating no unlocking
	 * against w_target_page in ocfs2_write_end_nolock.
	 */
	unsigned int			w_target_locked:1;

	/*
	/*
	 * ocfs2_write_end() uses this to know what the real range to
	 * ocfs2_write_end() uses this to know what the real range to
	 * write in the target should be.
	 * write in the target should be.
@@ -895,6 +919,24 @@ void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages)


static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc)
static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc)
{
{
	int i;

	/*
	 * w_target_locked is only set to true in the page_mkwrite() case.
	 * The intent is to allow us to lock the target page from write_begin()
	 * to write_end(). The caller must hold a ref on w_target_page.
	 */
	if (wc->w_target_locked) {
		BUG_ON(!wc->w_target_page);
		for (i = 0; i < wc->w_num_pages; i++) {
			if (wc->w_target_page == wc->w_pages[i]) {
				wc->w_pages[i] = NULL;
				break;
			}
		}
		mark_page_accessed(wc->w_target_page);
		page_cache_release(wc->w_target_page);
	}
	ocfs2_unlock_and_free_pages(wc->w_pages, wc->w_num_pages);
	ocfs2_unlock_and_free_pages(wc->w_pages, wc->w_num_pages);


	brelse(wc->w_di_bh);
	brelse(wc->w_di_bh);
@@ -1132,20 +1174,17 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping,
			 */
			 */
			lock_page(mmap_page);
			lock_page(mmap_page);


			/* Exit and let the caller retry */
			if (mmap_page->mapping != mapping) {
			if (mmap_page->mapping != mapping) {
				WARN_ON(mmap_page->mapping);
				unlock_page(mmap_page);
				unlock_page(mmap_page);
				/*
				ret = -EAGAIN;
				 * Sanity check - the locking in
				 * ocfs2_pagemkwrite() should ensure
				 * that this code doesn't trigger.
				 */
				ret = -EINVAL;
				mlog_errno(ret);
				goto out;
				goto out;
			}
			}


			page_cache_get(mmap_page);
			page_cache_get(mmap_page);
			wc->w_pages[i] = mmap_page;
			wc->w_pages[i] = mmap_page;
			wc->w_target_locked = true;
		} else {
		} else {
			wc->w_pages[i] = find_or_create_page(mapping, index,
			wc->w_pages[i] = find_or_create_page(mapping, index,
							     GFP_NOFS);
							     GFP_NOFS);
@@ -1160,6 +1199,8 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping,
			wc->w_target_page = wc->w_pages[i];
			wc->w_target_page = wc->w_pages[i];
	}
	}
out:
out:
	if (ret)
		wc->w_target_locked = false;
	return ret;
	return ret;
}
}


@@ -1817,11 +1858,23 @@ try_again:
	 */
	 */
	ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, len,
	ret = ocfs2_grab_pages_for_write(mapping, wc, wc->w_cpos, pos, len,
					 cluster_of_pages, mmap_page);
					 cluster_of_pages, mmap_page);
	if (ret) {
	if (ret && ret != -EAGAIN) {
		mlog_errno(ret);
		mlog_errno(ret);
		goto out_quota;
		goto out_quota;
	}
	}


	/*
	 * ocfs2_grab_pages_for_write() returns -EAGAIN if it could not lock
	 * the target page. In this case, we exit with no error and no target
	 * page. This will trigger the caller, page_mkwrite(), to re-try
	 * the operation.
	 */
	if (ret == -EAGAIN) {
		BUG_ON(wc->w_target_page);
		ret = 0;
		goto out_quota;
	}

	ret = ocfs2_write_cluster_by_desc(mapping, data_ac, meta_ac, wc, pos,
	ret = ocfs2_write_cluster_by_desc(mapping, data_ac, meta_ac, wc, pos,
					  len);
					  len);
	if (ret) {
	if (ret) {
+14 −0
Original line number Original line Diff line number Diff line
@@ -78,6 +78,7 @@ enum ocfs2_iocb_lock_bits {
	OCFS2_IOCB_RW_LOCK = 0,
	OCFS2_IOCB_RW_LOCK = 0,
	OCFS2_IOCB_RW_LOCK_LEVEL,
	OCFS2_IOCB_RW_LOCK_LEVEL,
	OCFS2_IOCB_SEM,
	OCFS2_IOCB_SEM,
	OCFS2_IOCB_UNALIGNED_IO,
	OCFS2_IOCB_NUM_LOCKS
	OCFS2_IOCB_NUM_LOCKS
};
};


@@ -91,4 +92,17 @@ enum ocfs2_iocb_lock_bits {
	clear_bit(OCFS2_IOCB_SEM, (unsigned long *)&iocb->private)
	clear_bit(OCFS2_IOCB_SEM, (unsigned long *)&iocb->private)
#define ocfs2_iocb_is_sem_locked(iocb) \
#define ocfs2_iocb_is_sem_locked(iocb) \
	test_bit(OCFS2_IOCB_SEM, (unsigned long *)&iocb->private)
	test_bit(OCFS2_IOCB_SEM, (unsigned long *)&iocb->private)

#define ocfs2_iocb_set_unaligned_aio(iocb) \
	set_bit(OCFS2_IOCB_UNALIGNED_IO, (unsigned long *)&iocb->private)
#define ocfs2_iocb_clear_unaligned_aio(iocb) \
	clear_bit(OCFS2_IOCB_UNALIGNED_IO, (unsigned long *)&iocb->private)
#define ocfs2_iocb_is_unaligned_aio(iocb) \
	test_bit(OCFS2_IOCB_UNALIGNED_IO, (unsigned long *)&iocb->private)

#define OCFS2_IOEND_WQ_HASH_SZ	37
#define ocfs2_ioend_wq(v)   (&ocfs2__ioend_wq[((unsigned long)(v)) %\
					    OCFS2_IOEND_WQ_HASH_SZ])
extern wait_queue_head_t ocfs2__ioend_wq[OCFS2_IOEND_WQ_HASH_SZ];

#endif /* OCFS2_FILE_H */
#endif /* OCFS2_FILE_H */
+122 −72
Original line number Original line Diff line number Diff line
@@ -216,6 +216,7 @@ struct o2hb_region {


	struct list_head	hr_all_item;
	struct list_head	hr_all_item;
	unsigned		hr_unclean_stop:1,
	unsigned		hr_unclean_stop:1,
				hr_aborted_start:1,
				hr_item_pinned:1,
				hr_item_pinned:1,
				hr_item_dropped:1;
				hr_item_dropped:1;


@@ -254,6 +255,10 @@ struct o2hb_region {
	 * a more complete api that doesn't lead to this sort of fragility. */
	 * a more complete api that doesn't lead to this sort of fragility. */
	atomic_t		hr_steady_iterations;
	atomic_t		hr_steady_iterations;


	/* terminate o2hb thread if it does not reach steady state
	 * (hr_steady_iterations == 0) within hr_unsteady_iterations */
	atomic_t		hr_unsteady_iterations;

	char			hr_dev_name[BDEVNAME_SIZE];
	char			hr_dev_name[BDEVNAME_SIZE];


	unsigned int		hr_timeout_ms;
	unsigned int		hr_timeout_ms;
@@ -324,6 +329,10 @@ static void o2hb_write_timeout(struct work_struct *work)


static void o2hb_arm_write_timeout(struct o2hb_region *reg)
static void o2hb_arm_write_timeout(struct o2hb_region *reg)
{
{
	/* Arm writeout only after thread reaches steady state */
	if (atomic_read(&reg->hr_steady_iterations) != 0)
		return;

	mlog(ML_HEARTBEAT, "Queue write timeout for %u ms\n",
	mlog(ML_HEARTBEAT, "Queue write timeout for %u ms\n",
	     O2HB_MAX_WRITE_TIMEOUT_MS);
	     O2HB_MAX_WRITE_TIMEOUT_MS);


@@ -537,9 +546,14 @@ static int o2hb_verify_crc(struct o2hb_region *reg,
	return read == computed;
	return read == computed;
}
}


/* We want to make sure that nobody is heartbeating on top of us --
/*
 * this will help detect an invalid configuration. */
 * Compare the slot data with what we wrote in the last iteration.
static void o2hb_check_last_timestamp(struct o2hb_region *reg)
 * If the match fails, print an appropriate error message. This is to
 * detect errors like... another node hearting on the same slot,
 * flaky device that is losing writes, etc.
 * Returns 1 if check succeeds, 0 otherwise.
 */
static int o2hb_check_own_slot(struct o2hb_region *reg)
{
{
	struct o2hb_disk_slot *slot;
	struct o2hb_disk_slot *slot;
	struct o2hb_disk_heartbeat_block *hb_block;
	struct o2hb_disk_heartbeat_block *hb_block;
@@ -548,13 +562,13 @@ static void o2hb_check_last_timestamp(struct o2hb_region *reg)
	slot = &reg->hr_slots[o2nm_this_node()];
	slot = &reg->hr_slots[o2nm_this_node()];
	/* Don't check on our 1st timestamp */
	/* Don't check on our 1st timestamp */
	if (!slot->ds_last_time)
	if (!slot->ds_last_time)
		return;
		return 0;


	hb_block = slot->ds_raw_block;
	hb_block = slot->ds_raw_block;
	if (le64_to_cpu(hb_block->hb_seq) == slot->ds_last_time &&
	if (le64_to_cpu(hb_block->hb_seq) == slot->ds_last_time &&
	    le64_to_cpu(hb_block->hb_generation) == slot->ds_last_generation &&
	    le64_to_cpu(hb_block->hb_generation) == slot->ds_last_generation &&
	    hb_block->hb_node == slot->ds_node_num)
	    hb_block->hb_node == slot->ds_node_num)
		return;
		return 1;


#define ERRSTR1		"Another node is heartbeating on device"
#define ERRSTR1		"Another node is heartbeating on device"
#define ERRSTR2		"Heartbeat generation mismatch on device"
#define ERRSTR2		"Heartbeat generation mismatch on device"
@@ -574,6 +588,8 @@ static void o2hb_check_last_timestamp(struct o2hb_region *reg)
	     (unsigned long long)slot->ds_last_time, hb_block->hb_node,
	     (unsigned long long)slot->ds_last_time, hb_block->hb_node,
	     (unsigned long long)le64_to_cpu(hb_block->hb_generation),
	     (unsigned long long)le64_to_cpu(hb_block->hb_generation),
	     (unsigned long long)le64_to_cpu(hb_block->hb_seq));
	     (unsigned long long)le64_to_cpu(hb_block->hb_seq));

	return 0;
}
}


static inline void o2hb_prepare_block(struct o2hb_region *reg,
static inline void o2hb_prepare_block(struct o2hb_region *reg,
@@ -719,17 +735,24 @@ static void o2hb_shutdown_slot(struct o2hb_disk_slot *slot)
	o2nm_node_put(node);
	o2nm_node_put(node);
}
}


static void o2hb_set_quorum_device(struct o2hb_region *reg,
static void o2hb_set_quorum_device(struct o2hb_region *reg)
				   struct o2hb_disk_slot *slot)
{
{
	assert_spin_locked(&o2hb_live_lock);

	if (!o2hb_global_heartbeat_active())
	if (!o2hb_global_heartbeat_active())
		return;
		return;


	if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap))
	/* Prevent race with o2hb_heartbeat_group_drop_item() */
	if (kthread_should_stop())
		return;

	/* Tag region as quorum only after thread reaches steady state */
	if (atomic_read(&reg->hr_steady_iterations) != 0)
		return;
		return;


	spin_lock(&o2hb_live_lock);

	if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap))
		goto unlock;

	/*
	/*
	 * A region can be added to the quorum only when it sees all
	 * A region can be added to the quorum only when it sees all
	 * live nodes heartbeat on it. In other words, the region has been
	 * live nodes heartbeat on it. In other words, the region has been
@@ -737,13 +760,10 @@ static void o2hb_set_quorum_device(struct o2hb_region *reg,
	 */
	 */
	if (memcmp(reg->hr_live_node_bitmap, o2hb_live_node_bitmap,
	if (memcmp(reg->hr_live_node_bitmap, o2hb_live_node_bitmap,
		   sizeof(o2hb_live_node_bitmap)))
		   sizeof(o2hb_live_node_bitmap)))
		return;
		goto unlock;

	if (slot->ds_changed_samples < O2HB_LIVE_THRESHOLD)
		return;


	printk(KERN_NOTICE "o2hb: Region %s is now a quorum device\n",
	printk(KERN_NOTICE "o2hb: Region %s (%s) is now a quorum device\n",
	       config_item_name(&reg->hr_item));
	       config_item_name(&reg->hr_item), reg->hr_dev_name);


	set_bit(reg->hr_region_num, o2hb_quorum_region_bitmap);
	set_bit(reg->hr_region_num, o2hb_quorum_region_bitmap);


@@ -754,6 +774,8 @@ static void o2hb_set_quorum_device(struct o2hb_region *reg,
	if (o2hb_pop_count(&o2hb_quorum_region_bitmap,
	if (o2hb_pop_count(&o2hb_quorum_region_bitmap,
			   O2NM_MAX_REGIONS) > O2HB_PIN_CUT_OFF)
			   O2NM_MAX_REGIONS) > O2HB_PIN_CUT_OFF)
		o2hb_region_unpin(NULL);
		o2hb_region_unpin(NULL);
unlock:
	spin_unlock(&o2hb_live_lock);
}
}


static int o2hb_check_slot(struct o2hb_region *reg,
static int o2hb_check_slot(struct o2hb_region *reg,
@@ -925,8 +947,6 @@ fire_callbacks:
		slot->ds_equal_samples = 0;
		slot->ds_equal_samples = 0;
	}
	}
out:
out:
	o2hb_set_quorum_device(reg, slot);

	spin_unlock(&o2hb_live_lock);
	spin_unlock(&o2hb_live_lock);


	o2hb_run_event_list(&event);
	o2hb_run_event_list(&event);
@@ -957,7 +977,8 @@ static int o2hb_highest_node(unsigned long *nodes,


static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
{
{
	int i, ret, highest_node, change = 0;
	int i, ret, highest_node;
	int membership_change = 0, own_slot_ok = 0;
	unsigned long configured_nodes[BITS_TO_LONGS(O2NM_MAX_NODES)];
	unsigned long configured_nodes[BITS_TO_LONGS(O2NM_MAX_NODES)];
	unsigned long live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
	unsigned long live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
	struct o2hb_bio_wait_ctxt write_wc;
	struct o2hb_bio_wait_ctxt write_wc;
@@ -966,7 +987,7 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
				       sizeof(configured_nodes));
				       sizeof(configured_nodes));
	if (ret) {
	if (ret) {
		mlog_errno(ret);
		mlog_errno(ret);
		return ret;
		goto bail;
	}
	}


	/*
	/*
@@ -982,8 +1003,9 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)


	highest_node = o2hb_highest_node(configured_nodes, O2NM_MAX_NODES);
	highest_node = o2hb_highest_node(configured_nodes, O2NM_MAX_NODES);
	if (highest_node >= O2NM_MAX_NODES) {
	if (highest_node >= O2NM_MAX_NODES) {
		mlog(ML_NOTICE, "ocfs2_heartbeat: no configured nodes found!\n");
		mlog(ML_NOTICE, "o2hb: No configured nodes found!\n");
		return -EINVAL;
		ret = -EINVAL;
		goto bail;
	}
	}


	/* No sense in reading the slots of nodes that don't exist
	/* No sense in reading the slots of nodes that don't exist
@@ -993,29 +1015,27 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
	ret = o2hb_read_slots(reg, highest_node + 1);
	ret = o2hb_read_slots(reg, highest_node + 1);
	if (ret < 0) {
	if (ret < 0) {
		mlog_errno(ret);
		mlog_errno(ret);
		return ret;
		goto bail;
	}
	}


	/* With an up to date view of the slots, we can check that no
	/* With an up to date view of the slots, we can check that no
	 * other node has been improperly configured to heartbeat in
	 * other node has been improperly configured to heartbeat in
	 * our slot. */
	 * our slot. */
	o2hb_check_last_timestamp(reg);
	own_slot_ok = o2hb_check_own_slot(reg);


	/* fill in the proper info for our next heartbeat */
	/* fill in the proper info for our next heartbeat */
	o2hb_prepare_block(reg, reg->hr_generation);
	o2hb_prepare_block(reg, reg->hr_generation);


	/* And fire off the write. Note that we don't wait on this I/O
	 * until later. */
	ret = o2hb_issue_node_write(reg, &write_wc);
	ret = o2hb_issue_node_write(reg, &write_wc);
	if (ret < 0) {
	if (ret < 0) {
		mlog_errno(ret);
		mlog_errno(ret);
		return ret;
		goto bail;
	}
	}


	i = -1;
	i = -1;
	while((i = find_next_bit(configured_nodes,
	while((i = find_next_bit(configured_nodes,
				 O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) {
				 O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES) {
		change |= o2hb_check_slot(reg, &reg->hr_slots[i]);
		membership_change |= o2hb_check_slot(reg, &reg->hr_slots[i]);
	}
	}


	/*
	/*
@@ -1030,18 +1050,39 @@ static int o2hb_do_disk_heartbeat(struct o2hb_region *reg)
		 * disk */
		 * disk */
		mlog(ML_ERROR, "Write error %d on device \"%s\"\n",
		mlog(ML_ERROR, "Write error %d on device \"%s\"\n",
		     write_wc.wc_error, reg->hr_dev_name);
		     write_wc.wc_error, reg->hr_dev_name);
		return write_wc.wc_error;
		ret = write_wc.wc_error;
		goto bail;
	}
	}


	/* Skip disarming the timeout if own slot has stale/bad data */
	if (own_slot_ok) {
		o2hb_set_quorum_device(reg);
		o2hb_arm_write_timeout(reg);
		o2hb_arm_write_timeout(reg);
	}


bail:
	/* let the person who launched us know when things are steady */
	/* let the person who launched us know when things are steady */
	if (!change && (atomic_read(&reg->hr_steady_iterations) != 0)) {
	if (atomic_read(&reg->hr_steady_iterations) != 0) {
		if (!ret && own_slot_ok && !membership_change) {
			if (atomic_dec_and_test(&reg->hr_steady_iterations))
			if (atomic_dec_and_test(&reg->hr_steady_iterations))
				wake_up(&o2hb_steady_queue);
				wake_up(&o2hb_steady_queue);
		}
		}
	}


	return 0;
	if (atomic_read(&reg->hr_steady_iterations) != 0) {
		if (atomic_dec_and_test(&reg->hr_unsteady_iterations)) {
			printk(KERN_NOTICE "o2hb: Unable to stabilize "
			       "heartbeart on region %s (%s)\n",
			       config_item_name(&reg->hr_item),
			       reg->hr_dev_name);
			atomic_set(&reg->hr_steady_iterations, 0);
			reg->hr_aborted_start = 1;
			wake_up(&o2hb_steady_queue);
			ret = -EIO;
		}
	}

	return ret;
}
}


/* Subtract b from a, storing the result in a. a *must* have a larger
/* Subtract b from a, storing the result in a. a *must* have a larger
@@ -1095,7 +1136,8 @@ static int o2hb_thread(void *data)
	/* Pin node */
	/* Pin node */
	o2nm_depend_this_node();
	o2nm_depend_this_node();


	while (!kthread_should_stop() && !reg->hr_unclean_stop) {
	while (!kthread_should_stop() &&
	       !reg->hr_unclean_stop && !reg->hr_aborted_start) {
		/* We track the time spent inside
		/* We track the time spent inside
		 * o2hb_do_disk_heartbeat so that we avoid more than
		 * o2hb_do_disk_heartbeat so that we avoid more than
		 * hr_timeout_ms between disk writes. On busy systems
		 * hr_timeout_ms between disk writes. On busy systems
@@ -1103,10 +1145,7 @@ static int o2hb_thread(void *data)
		 * likely to time itself out. */
		 * likely to time itself out. */
		do_gettimeofday(&before_hb);
		do_gettimeofday(&before_hb);


		i = 0;
		do {
		ret = o2hb_do_disk_heartbeat(reg);
		ret = o2hb_do_disk_heartbeat(reg);
		} while (ret && ++i < 2);


		do_gettimeofday(&after_hb);
		do_gettimeofday(&after_hb);
		elapsed_msec = o2hb_elapsed_msecs(&before_hb, &after_hb);
		elapsed_msec = o2hb_elapsed_msecs(&before_hb, &after_hb);
@@ -1117,7 +1156,8 @@ static int o2hb_thread(void *data)
		     after_hb.tv_sec, (unsigned long) after_hb.tv_usec,
		     after_hb.tv_sec, (unsigned long) after_hb.tv_usec,
		     elapsed_msec);
		     elapsed_msec);


		if (elapsed_msec < reg->hr_timeout_ms) {
		if (!kthread_should_stop() &&
		    elapsed_msec < reg->hr_timeout_ms) {
			/* the kthread api has blocked signals for us so no
			/* the kthread api has blocked signals for us so no
			 * need to record the return value. */
			 * need to record the return value. */
			msleep_interruptible(reg->hr_timeout_ms - elapsed_msec);
			msleep_interruptible(reg->hr_timeout_ms - elapsed_msec);
@@ -1134,20 +1174,20 @@ static int o2hb_thread(void *data)
	 * to timeout on this region when we could just as easily
	 * to timeout on this region when we could just as easily
	 * write a clear generation - thus indicating to them that
	 * write a clear generation - thus indicating to them that
	 * this node has left this region.
	 * this node has left this region.
	 *
	 */
	 * XXX: Should we skip this on unclean_stop? */
	if (!reg->hr_unclean_stop && !reg->hr_aborted_start) {
		o2hb_prepare_block(reg, 0);
		o2hb_prepare_block(reg, 0);
		ret = o2hb_issue_node_write(reg, &write_wc);
		ret = o2hb_issue_node_write(reg, &write_wc);
	if (ret == 0) {
		if (ret == 0)
			o2hb_wait_on_io(reg, &write_wc);
			o2hb_wait_on_io(reg, &write_wc);
	} else {
		else
			mlog_errno(ret);
			mlog_errno(ret);
	}
	}


	/* Unpin node */
	/* Unpin node */
	o2nm_undepend_this_node();
	o2nm_undepend_this_node();


	mlog(ML_HEARTBEAT|ML_KTHREAD, "hb thread exiting\n");
	mlog(ML_HEARTBEAT|ML_KTHREAD, "o2hb thread exiting\n");


	return 0;
	return 0;
}
}
@@ -1158,6 +1198,7 @@ static int o2hb_debug_open(struct inode *inode, struct file *file)
	struct o2hb_debug_buf *db = inode->i_private;
	struct o2hb_debug_buf *db = inode->i_private;
	struct o2hb_region *reg;
	struct o2hb_region *reg;
	unsigned long map[BITS_TO_LONGS(O2NM_MAX_NODES)];
	unsigned long map[BITS_TO_LONGS(O2NM_MAX_NODES)];
	unsigned long lts;
	char *buf = NULL;
	char *buf = NULL;
	int i = -1;
	int i = -1;
	int out = 0;
	int out = 0;
@@ -1194,9 +1235,11 @@ static int o2hb_debug_open(struct inode *inode, struct file *file)


	case O2HB_DB_TYPE_REGION_ELAPSED_TIME:
	case O2HB_DB_TYPE_REGION_ELAPSED_TIME:
		reg = (struct o2hb_region *)db->db_data;
		reg = (struct o2hb_region *)db->db_data;
		out += snprintf(buf + out, PAGE_SIZE - out, "%u\n",
		lts = reg->hr_last_timeout_start;
				jiffies_to_msecs(jiffies -
		/* If 0, it has never been set before */
						 reg->hr_last_timeout_start));
		if (lts)
			lts = jiffies_to_msecs(jiffies - lts);
		out += snprintf(buf + out, PAGE_SIZE - out, "%lu\n", lts);
		goto done;
		goto done;


	case O2HB_DB_TYPE_REGION_PINNED:
	case O2HB_DB_TYPE_REGION_PINNED:
@@ -1426,6 +1469,8 @@ static void o2hb_region_release(struct config_item *item)
	struct page *page;
	struct page *page;
	struct o2hb_region *reg = to_o2hb_region(item);
	struct o2hb_region *reg = to_o2hb_region(item);


	mlog(ML_HEARTBEAT, "hb region release (%s)\n", reg->hr_dev_name);

	if (reg->hr_tmp_block)
	if (reg->hr_tmp_block)
		kfree(reg->hr_tmp_block);
		kfree(reg->hr_tmp_block);


@@ -1792,7 +1837,10 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
			live_threshold <<= 1;
			live_threshold <<= 1;
		spin_unlock(&o2hb_live_lock);
		spin_unlock(&o2hb_live_lock);
	}
	}
	atomic_set(&reg->hr_steady_iterations, live_threshold + 1);
	++live_threshold;
	atomic_set(&reg->hr_steady_iterations, live_threshold);
	/* unsteady_iterations is double the steady_iterations */
	atomic_set(&reg->hr_unsteady_iterations, (live_threshold << 1));


	hb_task = kthread_run(o2hb_thread, reg, "o2hb-%s",
	hb_task = kthread_run(o2hb_thread, reg, "o2hb-%s",
			      reg->hr_item.ci_name);
			      reg->hr_item.ci_name);
@@ -1809,14 +1857,12 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
	ret = wait_event_interruptible(o2hb_steady_queue,
	ret = wait_event_interruptible(o2hb_steady_queue,
				atomic_read(&reg->hr_steady_iterations) == 0);
				atomic_read(&reg->hr_steady_iterations) == 0);
	if (ret) {
	if (ret) {
		/* We got interrupted (hello ptrace!).  Clean up */
		atomic_set(&reg->hr_steady_iterations, 0);
		spin_lock(&o2hb_live_lock);
		reg->hr_aborted_start = 1;
		hb_task = reg->hr_task;
	}
		reg->hr_task = NULL;
		spin_unlock(&o2hb_live_lock);


		if (hb_task)
	if (reg->hr_aborted_start) {
			kthread_stop(hb_task);
		ret = -EIO;
		goto out;
		goto out;
	}
	}


@@ -1833,8 +1879,8 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
		ret = -EIO;
		ret = -EIO;


	if (hb_task && o2hb_global_heartbeat_active())
	if (hb_task && o2hb_global_heartbeat_active())
		printk(KERN_NOTICE "o2hb: Heartbeat started on region %s\n",
		printk(KERN_NOTICE "o2hb: Heartbeat started on region %s (%s)\n",
		       config_item_name(&reg->hr_item));
		       config_item_name(&reg->hr_item), reg->hr_dev_name);


out:
out:
	if (filp)
	if (filp)
@@ -2092,13 +2138,6 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group,


	/* stop the thread when the user removes the region dir */
	/* stop the thread when the user removes the region dir */
	spin_lock(&o2hb_live_lock);
	spin_lock(&o2hb_live_lock);
	if (o2hb_global_heartbeat_active()) {
		clear_bit(reg->hr_region_num, o2hb_region_bitmap);
		clear_bit(reg->hr_region_num, o2hb_live_region_bitmap);
		if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap))
			quorum_region = 1;
		clear_bit(reg->hr_region_num, o2hb_quorum_region_bitmap);
	}
	hb_task = reg->hr_task;
	hb_task = reg->hr_task;
	reg->hr_task = NULL;
	reg->hr_task = NULL;
	reg->hr_item_dropped = 1;
	reg->hr_item_dropped = 1;
@@ -2107,19 +2146,30 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group,
	if (hb_task)
	if (hb_task)
		kthread_stop(hb_task);
		kthread_stop(hb_task);


	if (o2hb_global_heartbeat_active()) {
		spin_lock(&o2hb_live_lock);
		clear_bit(reg->hr_region_num, o2hb_region_bitmap);
		clear_bit(reg->hr_region_num, o2hb_live_region_bitmap);
		if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap))
			quorum_region = 1;
		clear_bit(reg->hr_region_num, o2hb_quorum_region_bitmap);
		spin_unlock(&o2hb_live_lock);
		printk(KERN_NOTICE "o2hb: Heartbeat %s on region %s (%s)\n",
		       ((atomic_read(&reg->hr_steady_iterations) == 0) ?
			"stopped" : "start aborted"), config_item_name(item),
		       reg->hr_dev_name);
	}

	/*
	/*
	 * If we're racing a dev_write(), we need to wake them.  They will
	 * If we're racing a dev_write(), we need to wake them.  They will
	 * check reg->hr_task
	 * check reg->hr_task
	 */
	 */
	if (atomic_read(&reg->hr_steady_iterations) != 0) {
	if (atomic_read(&reg->hr_steady_iterations) != 0) {
		reg->hr_aborted_start = 1;
		atomic_set(&reg->hr_steady_iterations, 0);
		atomic_set(&reg->hr_steady_iterations, 0);
		wake_up(&o2hb_steady_queue);
		wake_up(&o2hb_steady_queue);
	}
	}


	if (o2hb_global_heartbeat_active())
		printk(KERN_NOTICE "o2hb: Heartbeat stopped on region %s\n",
		       config_item_name(&reg->hr_item));

	config_item_put(item);
	config_item_put(item);


	if (!o2hb_global_heartbeat_active() || !quorum_region)
	if (!o2hb_global_heartbeat_active() || !quorum_region)
+69 −33
Original line number Original line Diff line number Diff line
@@ -47,6 +47,7 @@
#define SC_DEBUG_NAME		"sock_containers"
#define SC_DEBUG_NAME		"sock_containers"
#define NST_DEBUG_NAME		"send_tracking"
#define NST_DEBUG_NAME		"send_tracking"
#define STATS_DEBUG_NAME	"stats"
#define STATS_DEBUG_NAME	"stats"
#define NODES_DEBUG_NAME	"connected_nodes"


#define SHOW_SOCK_CONTAINERS	0
#define SHOW_SOCK_CONTAINERS	0
#define SHOW_SOCK_STATS		1
#define SHOW_SOCK_STATS		1
@@ -55,6 +56,7 @@ static struct dentry *o2net_dentry;
static struct dentry *sc_dentry;
static struct dentry *sc_dentry;
static struct dentry *nst_dentry;
static struct dentry *nst_dentry;
static struct dentry *stats_dentry;
static struct dentry *stats_dentry;
static struct dentry *nodes_dentry;


static DEFINE_SPINLOCK(o2net_debug_lock);
static DEFINE_SPINLOCK(o2net_debug_lock);


@@ -491,53 +493,87 @@ static const struct file_operations sc_seq_fops = {
	.release = sc_fop_release,
	.release = sc_fop_release,
};
};


int o2net_debugfs_init(void)
static int o2net_fill_bitmap(char *buf, int len)
{
{
	o2net_dentry = debugfs_create_dir(O2NET_DEBUG_DIR, NULL);
	unsigned long map[BITS_TO_LONGS(O2NM_MAX_NODES)];
	if (!o2net_dentry) {
	int i = -1, out = 0;
		mlog_errno(-ENOMEM);
		goto bail;
	}


	nst_dentry = debugfs_create_file(NST_DEBUG_NAME, S_IFREG|S_IRUSR,
	o2net_fill_node_map(map, sizeof(map));
					 o2net_dentry, NULL,
					 &nst_seq_fops);
	if (!nst_dentry) {
		mlog_errno(-ENOMEM);
		goto bail;
	}


	sc_dentry = debugfs_create_file(SC_DEBUG_NAME, S_IFREG|S_IRUSR,
	while ((i = find_next_bit(map, O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES)
					o2net_dentry, NULL,
		out += snprintf(buf + out, PAGE_SIZE - out, "%d ", i);
					&sc_seq_fops);
	out += snprintf(buf + out, PAGE_SIZE - out, "\n");
	if (!sc_dentry) {

		mlog_errno(-ENOMEM);
	return out;
		goto bail;
}
}


	stats_dentry = debugfs_create_file(STATS_DEBUG_NAME, S_IFREG|S_IRUSR,
static int nodes_fop_open(struct inode *inode, struct file *file)
					   o2net_dentry, NULL,
{
					   &stats_seq_fops);
	char *buf;
	if (!stats_dentry) {

		mlog_errno(-ENOMEM);
	buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
		goto bail;
	if (!buf)
		return -ENOMEM;

	i_size_write(inode, o2net_fill_bitmap(buf, PAGE_SIZE));

	file->private_data = buf;

	return 0;
}
}


static int o2net_debug_release(struct inode *inode, struct file *file)
{
	kfree(file->private_data);
	return 0;
	return 0;
bail:
	debugfs_remove(stats_dentry);
	debugfs_remove(sc_dentry);
	debugfs_remove(nst_dentry);
	debugfs_remove(o2net_dentry);
	return -ENOMEM;
}
}


static ssize_t o2net_debug_read(struct file *file, char __user *buf,
				size_t nbytes, loff_t *ppos)
{
	return simple_read_from_buffer(buf, nbytes, ppos, file->private_data,
				       i_size_read(file->f_mapping->host));
}

static const struct file_operations nodes_fops = {
	.open		= nodes_fop_open,
	.release	= o2net_debug_release,
	.read		= o2net_debug_read,
	.llseek		= generic_file_llseek,
};

void o2net_debugfs_exit(void)
void o2net_debugfs_exit(void)
{
{
	debugfs_remove(nodes_dentry);
	debugfs_remove(stats_dentry);
	debugfs_remove(stats_dentry);
	debugfs_remove(sc_dentry);
	debugfs_remove(sc_dentry);
	debugfs_remove(nst_dentry);
	debugfs_remove(nst_dentry);
	debugfs_remove(o2net_dentry);
	debugfs_remove(o2net_dentry);
}
}


int o2net_debugfs_init(void)
{
	mode_t mode = S_IFREG|S_IRUSR;

	o2net_dentry = debugfs_create_dir(O2NET_DEBUG_DIR, NULL);
	if (o2net_dentry)
		nst_dentry = debugfs_create_file(NST_DEBUG_NAME, mode,
					o2net_dentry, NULL, &nst_seq_fops);
	if (nst_dentry)
		sc_dentry = debugfs_create_file(SC_DEBUG_NAME, mode,
					o2net_dentry, NULL, &sc_seq_fops);
	if (sc_dentry)
		stats_dentry = debugfs_create_file(STATS_DEBUG_NAME, mode,
					o2net_dentry, NULL, &stats_seq_fops);
	if (stats_dentry)
		nodes_dentry = debugfs_create_file(NODES_DEBUG_NAME, mode,
					o2net_dentry, NULL, &nodes_fops);
	if (nodes_dentry)
		return 0;

	o2net_debugfs_exit();
	mlog_errno(-ENOMEM);
	return -ENOMEM;
}

#endif	/* CONFIG_DEBUG_FS */
#endif	/* CONFIG_DEBUG_FS */
Loading