Merge branch 'for-3.7/core' of git://git.kernel.dk/linux-block (ce40be7a) · Commits · e / devices / android_kernel_xiaomi_markw

Documentation/ABI/testing/sysfs-block

+14 −0

Original line number	Diff line number	Diff line
		@@ -206,3 +206,17 @@ Description:
		when a discarded area is read the discard_zeroes_data
		parameter will be set to one. Otherwise it will be 0 and
		the result of reading a discarded area is undefined.

		What: /sys/block/<disk>/queue/write_same_max_bytes
		Date: January 2012
		Contact: Martin K. Petersen <martin.petersen@oracle.com>
		Description:
		Some devices support a write same operation in which a
		single data block can be written to a range of several
		contiguous blocks on storage. This can be used to wipe
		areas on disk or to initialize drives in a RAID
		configuration. write_same_max_bytes indicates how many
		bytes can be written in a single write same command. If
		write_same_max_bytes is 0, write same is not supported
		by the device.

Documentation/block/biodoc.txt

+0 −5

Original line number	Diff line number	Diff line
		@@ -465,7 +465,6 @@ struct bio {
		bio_end_io_t bi_end_io; / bi_end_io (bio) */
		atomic_t bi_cnt; /* pin count: free when it hits zero */
		void *bi_private;
		bio_destructor_t bi_destructor; / bi_destructor (bio) */
		};

		With this multipage bio design:
		@@ -647,10 +646,6 @@ for a non-clone bio. There are the 6 pools setup for different size biovecs,
		so bio_alloc(gfp_mask, nr_iovecs) will allocate a vec_list of the
		given size from these slabs.

		The bi_destructor() routine takes into account the possibility of the bio
		having originated from a different source (see later discussions on
		n/w to block transfers and kvec_cb)

		The bio_get() routine may be used to hold an extra reference on a bio prior
		to i/o submission, if the bio fields are likely to be accessed after the
		i/o is issued (since the bio may otherwise get freed in case i/o completion

Documentation/percpu-rw-semaphore.txt

0 → 100644

+27 −0

Original line number	Diff line number	Diff line
		Percpu rw semaphores
		--------------------

		Percpu rw semaphores is a new read-write semaphore design that is
		optimized for locking for reading.

		The problem with traditional read-write semaphores is that when multiple
		cores take the lock for reading, the cache line containing the semaphore
		is bouncing between L1 caches of the cores, causing performance
		degradation.

		Locking for reading is very fast, it uses RCU and it avoids any atomic
		instruction in the lock and unlock path. On the other hand, locking for
		writing is very expensive, it calls synchronize_rcu() that can take
		hundreds of milliseconds.

		The lock is declared with "struct percpu_rw_semaphore" type.
		The lock is initialized percpu_init_rwsem, it returns 0 on success and
		-ENOMEM on allocation failure.
		The lock must be freed with percpu_free_rwsem to avoid memory leak.

		The lock is locked for read with percpu_down_read, percpu_up_read and
		for write with percpu_down_write, percpu_up_write.

		The idea of using RCU for optimized rw-lock was introduced by
		Eric Dumazet <eric.dumazet@gmail.com>.
		The code was written by Mikulas Patocka <mpatocka@redhat.com>

block/blk-core.c

+24 −27

Original line number	Diff line number	Diff line
		@@ -606,8 +606,8 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
		/*
		* A queue starts its life with bypass turned on to avoid
		* unnecessary bypass on/off overhead and nasty surprises during
		* init. The initial bypass will be finished at the end of
		* blk_init_allocated_queue().
		* init. The initial bypass will be finished when the queue is
		* registered by blk_register_queue().
		*/
		q->bypass_depth = 1;
		__set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags);
		@@ -694,7 +694,7 @@ blk_init_allocated_queue(struct request_queue q, request_fn_proc rfn,
		q->request_fn = rfn;
		q->prep_rq_fn = NULL;
		q->unprep_rq_fn = NULL;
		q->queue_flags = QUEUE_FLAG_DEFAULT;
		q->queue_flags \|= QUEUE_FLAG_DEFAULT;

		/* Override internal queue lock with supplied lock pointer */
		if (lock)
		@@ -710,11 +710,6 @@ blk_init_allocated_queue(struct request_queue q, request_fn_proc rfn,
		/* init elevator */
		if (elevator_init(q, NULL))
		return NULL;

		blk_queue_congestion_threshold(q);

		/* all done, end the initial bypass */
		blk_queue_bypass_end(q);
		return q;
		}
		EXPORT_SYMBOL(blk_init_allocated_queue);
		@@ -1657,7 +1652,7 @@ generic_make_request_checks(struct bio *bio)
		goto end_io;
		}

		if (unlikely(!(bio->bi_rw & REQ_DISCARD) &&
		if (likely(bio_is_rw(bio) &&
		nr_sectors > queue_max_hw_sectors(q))) {
		printk(KERN_ERR "bio too big device %s (%u > %u)\n",
		bdevname(bio->bi_bdev, b),
		@@ -1699,8 +1694,12 @@ generic_make_request_checks(struct bio *bio)

		if ((bio->bi_rw & REQ_DISCARD) &&
		(!blk_queue_discard(q) \|\|
		((bio->bi_rw & REQ_SECURE) &&
		!blk_queue_secdiscard(q)))) {
		((bio->bi_rw & REQ_SECURE) && !blk_queue_secdiscard(q)))) {
		err = -EOPNOTSUPP;
		goto end_io;
		}

		if (bio->bi_rw & REQ_WRITE_SAME && !bdev_write_same(bio->bi_bdev)) {
		err = -EOPNOTSUPP;
		goto end_io;
		}
		@@ -1810,15 +1809,20 @@ EXPORT_SYMBOL(generic_make_request);
		*/
		void submit_bio(int rw, struct bio *bio)
		{
		int count = bio_sectors(bio);

		bio->bi_rw \|= rw;

		/*
		* If it's a regular read/write or a barrier with data attached,
		* go through the normal accounting stuff before submission.
		*/
		if (bio_has_data(bio) && !(rw & REQ_DISCARD)) {
		if (bio_has_data(bio)) {
		unsigned int count;

		if (unlikely(rw & REQ_WRITE_SAME))
		count = bdev_logical_block_size(bio->bi_bdev) >> 9;
		else
		count = bio_sectors(bio);

		if (rw & WRITE) {
		count_vm_events(PGPGOUT, count);
		} else {
		@@ -1864,11 +1868,10 @@ EXPORT_SYMBOL(submit_bio);
		*/
		int blk_rq_check_limits(struct request_queue q, struct request rq)
		{
		if (rq->cmd_flags & REQ_DISCARD)
		if (!rq_mergeable(rq))
		return 0;

		if (blk_rq_sectors(rq) > queue_max_sectors(q) \|\|
		blk_rq_bytes(rq) > queue_max_hw_sectors(q) << 9) {
		if (blk_rq_sectors(rq) > blk_queue_get_max_sectors(q, rq->cmd_flags)) {
		printk(KERN_ERR "%s: over max size limit.\n", __func__);
		return -EIO;
		}
		@@ -2340,7 +2343,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes)
		req->buffer = bio_data(req->bio);

		/* update sector only for requests with clear definition of sector */
		if (req->cmd_type == REQ_TYPE_FS \|\| (req->cmd_flags & REQ_DISCARD))
		if (req->cmd_type == REQ_TYPE_FS)
		req->__sector += total_bytes >> 9;

		/* mixed attributes always follow the first bio */
		@@ -2781,16 +2784,10 @@ int blk_rq_prep_clone(struct request rq, struct request rq_src,
		blk_rq_init(NULL, rq);

		__rq_for_each_bio(bio_src, rq_src) {
		bio = bio_alloc_bioset(gfp_mask, bio_src->bi_max_vecs, bs);
		bio = bio_clone_bioset(bio_src, gfp_mask, bs);
		if (!bio)
		goto free_and_out;

		__bio_clone(bio, bio_src);

		if (bio_integrity(bio_src) &&
		bio_integrity_clone(bio, bio_src, gfp_mask, bs))
		goto free_and_out;

		if (bio_ctr && bio_ctr(bio, bio_src, data))
		goto free_and_out;

		@@ -2807,7 +2804,7 @@ int blk_rq_prep_clone(struct request rq, struct request rq_src,

		free_and_out:
		if (bio)
		bio_free(bio, bs);
		bio_put(bio);
		blk_rq_unprep_clone(rq);

		return -ENOMEM;

block/blk-lib.c

+103 −1

Original line number	Diff line number	Diff line
		@@ -129,6 +129,80 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
		}
		EXPORT_SYMBOL(blkdev_issue_discard);

		/**
		* blkdev_issue_write_same - queue a write same operation
		* @bdev: target blockdev
		* @sector: start sector
		* @nr_sects: number of sectors to write
		* @gfp_mask: memory allocation flags (for bio_alloc)
		* @page: page containing data to write
		*
		* Description:
		* Issue a write same request for the sectors in question.
		*/
		int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
		sector_t nr_sects, gfp_t gfp_mask,
		struct page *page)
		{
		DECLARE_COMPLETION_ONSTACK(wait);
		struct request_queue *q = bdev_get_queue(bdev);
		unsigned int max_write_same_sectors;
		struct bio_batch bb;
		struct bio *bio;
		int ret = 0;

		if (!q)
		return -ENXIO;

		max_write_same_sectors = q->limits.max_write_same_sectors;

		if (max_write_same_sectors == 0)
		return -EOPNOTSUPP;

		atomic_set(&bb.done, 1);
		bb.flags = 1 << BIO_UPTODATE;
		bb.wait = &wait;

		while (nr_sects) {
		bio = bio_alloc(gfp_mask, 1);
		if (!bio) {
		ret = -ENOMEM;
		break;
		}

		bio->bi_sector = sector;
		bio->bi_end_io = bio_batch_end_io;
		bio->bi_bdev = bdev;
		bio->bi_private = &bb;
		bio->bi_vcnt = 1;
		bio->bi_io_vec->bv_page = page;
		bio->bi_io_vec->bv_offset = 0;
		bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev);

		if (nr_sects > max_write_same_sectors) {
		bio->bi_size = max_write_same_sectors << 9;
		nr_sects -= max_write_same_sectors;
		sector += max_write_same_sectors;
		} else {
		bio->bi_size = nr_sects << 9;
		nr_sects = 0;
		}

		atomic_inc(&bb.done);
		submit_bio(REQ_WRITE \| REQ_WRITE_SAME, bio);
		}

		/* Wait for bios in-flight */
		if (!atomic_dec_and_test(&bb.done))
		wait_for_completion(&wait);

		if (!test_bit(BIO_UPTODATE, &bb.flags))
		ret = -ENOTSUPP;

		return ret;
		}
		EXPORT_SYMBOL(blkdev_issue_write_same);

		/**
		* blkdev_issue_zeroout - generate number of zero filed write bios
		* @bdev: blockdev to issue
		@@ -140,7 +214,7 @@ EXPORT_SYMBOL(blkdev_issue_discard);
		* Generate and issue number of bios with zerofiled pages.
		*/

		int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
		int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
		sector_t nr_sects, gfp_t gfp_mask)
		{
		int ret;
		@@ -190,4 +264,32 @@ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,

		return ret;
		}

		/**
		* blkdev_issue_zeroout - zero-fill a block range
		* @bdev: blockdev to write
		* @sector: start sector
		* @nr_sects: number of sectors to write
		* @gfp_mask: memory allocation flags (for bio_alloc)
		*
		* Description:
		* Generate and issue number of bios with zerofiled pages.
		*/

		int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
		sector_t nr_sects, gfp_t gfp_mask)
		{
		if (bdev_write_same(bdev)) {
		unsigned char bdn[BDEVNAME_SIZE];

		if (!blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask,
		ZERO_PAGE(0)))
		return 0;

		bdevname(bdev, bdn);
		pr_err("%s: WRITE SAME failed. Manually zeroing.\n", bdn);
		}

		return __blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask);
		}
		EXPORT_SYMBOL(blkdev_issue_zeroout);